Skip to content

Commit

Permalink
* rename class PostContent to BasePostContent
Browse files Browse the repository at this point in the history
@ crawler

* fix or suppress all violations of Roslyn analyzer rules
@ c#
  • Loading branch information
n0099 committed May 17, 2024
1 parent c129bf2 commit 8d41363
Show file tree
Hide file tree
Showing 17 changed files with 152 additions and 130 deletions.
2 changes: 1 addition & 1 deletion c#/crawler/Properties/PublishProfiles/FolderProfile.pubxml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
https://go.microsoft.com/fwlink/?LinkID=208121.
https://go.microsoft.com/fwlink/?LinkID=208121.
-->
<Project>
<PropertyGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// ReSharper disable PropertyCanBeMadeInitOnly.Global
namespace tbm.Crawler.Db.Post.PostContent;

public abstract class PostContent : RowVersionedEntity
public abstract class BasePostContent : RowVersionedEntity
{
public byte[]? ProtoBufBytes { get; set; }
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Db/Post/PostContent/ReplyContent.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// ReSharper disable PropertyCanBeMadeInitOnly.Global
namespace tbm.Crawler.Db.Post.PostContent;

public class ReplyContent : PostContent
public class ReplyContent : BasePostContent
{
[Key] public ulong Pid { get; set; }
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Db/Post/PostContent/SubReplyContent.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// ReSharper disable PropertyCanBeMadeInitOnly.Global
namespace tbm.Crawler.Db.Post.PostContent;

public class SubReplyContent : PostContent
public class SubReplyContent : BasePostContent
{
[Key] public ulong Spid { get; set; }
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Db/User.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public override bool Equals(User? x, User? y) => x == y || (

public override int GetHashCode(User obj)
{
var hash = new HashCode();
var hash = default(HashCode);
hash.Add(obj.Uid);
hash.Add(obj.Name);
hash.Add(obj.DisplayName);
Expand Down
1 change: 1 addition & 0 deletions c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ protected override ReplyPost Convert(Reply inPost)
o.Pid, c.OriginSrc, SharedHelper.UnescapedJsonSerialize(c));
}
}

// AuthorId rarely respond with 0, Author should always be null with no guarantee
o.AuthorUid = inPost.AuthorId.NullIfZero() ?? inPost.Author?.Uid ?? 0;

Expand Down
52 changes: 26 additions & 26 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,6 @@ public class ReplySaver(
{
public delegate ReplySaver New(ConcurrentDictionary<PostId, ReplyPost> posts);

protected override bool FieldUpdateIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
{ // possible randomly respond with null
nameof(ReplyPost.SignatureId) when newValue is null && oldValue is not null => true,
_ => false
};

public override bool UserFieldUpdateIgnorance(string propName, object? oldValue, object? newValue) => propName switch
{ // FansNickname in reply response will always be null
nameof(User.FansNickname) when newValue is null && oldValue is not null => true,
_ => false
};

public override bool UserFieldRevisionIgnorance(string propName, object? oldValue, object? newValue) => propName switch
{ // user icon will be null after UserParser.ResetUsersIcon() get invoked
nameof(User.Icon) when newValue is not null && oldValue is null => true,
_ => false
};

protected override Dictionary<Type, AddRevisionDelegate>
AddRevisionDelegatesKeyBySplitEntityType { get; } = new()
{
Expand All @@ -52,13 +33,16 @@ protected override Dictionary<Type, AddRevisionDelegate>
}
};

[SuppressMessage("StyleCop.CSharp.SpacingRules", "SA1025:Code should not contain multiple whitespace in a row")]
protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => fieldName switch
{
nameof(ReplyPost.IsFold) => 1 << 2,
nameof(ReplyPost.DisagreeCount) => 1 << 4,
nameof(ReplyPost.Geolocation) => 1 << 5,
_ => 0
public override bool UserFieldUpdateIgnorance(string propName, object? oldValue, object? newValue) => propName switch
{ // FansNickname in reply response will always be null
nameof(User.FansNickname) when newValue is null && oldValue is not null => true,
_ => false
};

public override bool UserFieldRevisionIgnorance(string propName, object? oldValue, object? newValue) => propName switch
{ // user icon will be null after UserParser.ResetUsersIcon() get invoked
nameof(User.Icon) when newValue is not null && oldValue is null => true,
_ => false
};

public override SaverChangeSet<ReplyPost> Save(CrawlerDbContext db)
Expand All @@ -73,4 +57,20 @@ public override SaverChangeSet<ReplyPost> Save(CrawlerDbContext db)

return changeSet;
}

protected override bool FieldUpdateIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
{ // possible randomly respond with null
nameof(ReplyPost.SignatureId) when newValue is null && oldValue is not null => true,
_ => false
};

[SuppressMessage("StyleCop.CSharp.SpacingRules", "SA1025:Code should not contain multiple whitespace in a row")]
protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => fieldName switch
{
nameof(ReplyPost.IsFold) => 1 << 2,
nameof(ReplyPost.DisagreeCount) => 1 << 4,
nameof(ReplyPost.Geolocation) => 1 << 5,
_ => 0
};
}
30 changes: 15 additions & 15 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,6 @@ public class SubReplySaver(
{
public delegate SubReplySaver New(ConcurrentDictionary<PostId, SubReplyPost> posts);

public override bool UserFieldUpdateIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
{ // always ignore updates on iconinfo due to some rare user will show some extra icons
// compare to reply response in the response of sub reply
nameof(User.Icon) => true,

// FansNickname in sub reply response will always be null
nameof(User.FansNickname) when newValue is null && oldValue is not null => true,

// DisplayName in users embedded in sub replies from response will be the legacy nickname
nameof(User.DisplayName) => true,
_ => false
};

protected override Dictionary<Type, AddRevisionDelegate>
AddRevisionDelegatesKeyBySplitEntityType { get; } = new()
{
Expand All @@ -40,7 +26,19 @@ protected override Dictionary<Type, AddRevisionDelegate>
}
};

protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => 0;
public override bool UserFieldUpdateIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
{ // always ignore updates on iconinfo due to some rare user will show some extra icons
// compare to reply response in the response of sub reply
nameof(User.Icon) => true,

// FansNickname in sub reply response will always be null
nameof(User.FansNickname) when newValue is null && oldValue is not null => true,

// DisplayName in users embedded in sub replies from response will be the legacy nickname
nameof(User.DisplayName) => true,
_ => false
};

public override SaverChangeSet<SubReplyPost> Save(CrawlerDbContext db)
{
Expand All @@ -51,4 +49,6 @@ public override SaverChangeSet<SubReplyPost> Save(CrawlerDbContext db)

return changeSet;
}

protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => 0;
}
56 changes: 31 additions & 25 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,43 @@ public class ThreadSaver(
{
public delegate ThreadSaver New(ConcurrentDictionary<Tid, ThreadPost> posts);

protected override Dictionary<Type, AddRevisionDelegate>
AddRevisionDelegatesKeyBySplitEntityType { get; } = new()
{
{
typeof(ThreadRevision.SplitViewCount), (db, revisions) =>
db.Set<ThreadRevision.SplitViewCount>()
.AddRange(revisions.OfType<ThreadRevision.SplitViewCount>())
}
};

public override bool UserFieldUpdateIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
{ // Icon.SpriteInfo will be an empty array and the icon url is a smaller one
// so we should mark it as null temporarily
// note this will cause we can't record when did a user update its iconinfo to null
// since these null values have been ignored in ReplySaver and SubReplySaver
nameof(User.Icon) => true,
_ => false
};

public override SaverChangeSet<ThreadPost> Save(CrawlerDbContext db) =>
Save(db, th => th.Tid,
th => new ThreadRevision {TakenAt = th.UpdatedAt ?? th.CreatedAt, Tid = th.Tid},
PredicateBuilder.New<ThreadPost>(th => Posts.Keys.Contains(th.Tid)));

protected override bool FieldUpdateIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
{ // will be updated by ThreadLateCrawler and ThreadLateCrawlFacade
nameof(ThreadPost.AuthorPhoneType) => true,

// prevent overwrite existing value of field liker_id which is saved by legacy crawler
// and Zan itself is deprecated by tieba, so it shouldn't get updated
nameof(ThreadPost.Zan) => true,

// possible randomly respond with null
nameof(ThreadPost.Geolocation) when newValue is null => true,

// empty string means the author had not written a title
// its value generated from the first reply within response of reply crawler
// will be later set by ReplyCrawlFacade.SaveParentThreadTitle()
Expand All @@ -30,8 +58,10 @@ when newValue is ""
// due to the thread is a multi forum topic thread
// thus its title can be varied within the forum and within the thread
|| (newValue is not "" && oldValue is not "") => true,

// possible randomly respond with 0.NullIfZero()
nameof(ThreadPost.DisagreeCount) when newValue is null && oldValue is not null => true,

// when the latest reply post is deleted and there's no new reply after delete
// this field but not LatestReplyPostedAt will be null
nameof(ThreadPost.LatestReplierUid) when newValue is null => true,
Expand All @@ -42,31 +72,12 @@ protected override bool FieldRevisionIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
{ // empty string from response has been updated by ReplyCrawlFacade.OnPostParse()
nameof(ThreadPost.Title) when oldValue is "" => true,

// null values will be later set by tieba client 6.0.2 response at ThreadParser.ParseInternal()
nameof(ThreadPost.LatestReplierUid) when oldValue is null => true,
_ => false
};

public override bool UserFieldUpdateIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
{ // Icon.SpriteInfo will be an empty array and the icon url is a smaller one
// so we should mark it as null temporarily
// note this will cause we can't record when did a user update its iconinfo to null
// since these null values have been ignored in ReplySaver and SubReplySaver
nameof(User.Icon) => true,
_ => false
};

protected override Dictionary<Type, AddRevisionDelegate>
AddRevisionDelegatesKeyBySplitEntityType { get; } = new()
{
{
typeof(ThreadRevision.SplitViewCount), (db, revisions) =>
db.Set<ThreadRevision.SplitViewCount>()
.AddRange(revisions.OfType<ThreadRevision.SplitViewCount>())
}
};

[SuppressMessage("StyleCop.CSharp.SpacingRules", "SA1025:Code should not contain multiple whitespace in a row")]
protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => fieldName switch
{
Expand All @@ -81,9 +92,4 @@ protected override Dictionary<Type, AddRevisionDelegate>
nameof(ThreadPost.Geolocation) => 1 << 10,
_ => 0
};

public override SaverChangeSet<ThreadPost> Save(CrawlerDbContext db) =>
Save(db, th => th.Tid,
th => new ThreadRevision {TakenAt = th.UpdatedAt ?? th.CreatedAt, Tid = th.Tid},
PredicateBuilder.New<ThreadPost>(th => Posts.Keys.Contains(th.Tid)));
}
8 changes: 7 additions & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ where imagesKeyByUrlFilename.Keys.Contains(e.UrlFilename)
throw new InvalidOperationException();
alreadyLocked.ForEach(urlFilename =>
{
lock (LocksKeyByUrlFilename[urlFilename]) { }
lock (LocksKeyByUrlFilename[urlFilename])
#pragma warning disable S108 // Either remove or fill this block of code.
{
}
#pragma warning restore S108 // Either remove or fill this block of code.
});
existingImages = existingImages
.Concat((
Expand Down Expand Up @@ -69,6 +73,8 @@ on existing.UrlFilename equals newInContent.UrlFilename

if (newlyLocked.Any(urlFilename => !LocksKeyByUrlFilename.TryRemove(urlFilename, out _)))
throw new InvalidOperationException();
#pragma warning disable IDISP007 // Don't dispose injected
locks.Dispose();
#pragma warning restore IDISP007 // Don't dispose injected
}
}
16 changes: 9 additions & 7 deletions c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,26 @@

namespace tbm.Crawler.Tieba.Crawl.Saver;

public abstract class SaverWithRevision<TBaseRevision>
(ILogger<SaverWithRevision<TBaseRevision>> logger)
public abstract partial class SaverWithRevision<TBaseRevision>(
ILogger<SaverWithRevision<TBaseRevision>> logger)
: IRevisionProperties
where TBaseRevision : BaseRevisionWithSplitting
{
protected delegate void AddRevisionDelegate(CrawlerDbContext db, IEnumerable<TBaseRevision> revision);
protected abstract IReadOnlyDictionary<Type, AddRevisionDelegate> AddRevisionDelegatesKeyBySplitEntityType { get; }
protected abstract NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName);

protected virtual bool ShouldIgnoreEntityRevision(string propName, PropertyEntry propEntry, EntityEntry entityEntry) => false;
protected virtual bool FieldUpdateIgnorance(string propName, object? oldValue, object? newValue) => false;
protected virtual bool FieldRevisionIgnorance(string propName, object? oldValue, object? newValue) => false;
private static bool GlobalFieldUpdateIgnorance(string propName, object? oldValue, object? newValue) => propName switch
{ // possible rarely respond with the protoBuf default value 0
nameof(BasePost.AuthorUid) when newValue is 0L && oldValue is not null => true,
_ => false
};
protected virtual bool FieldUpdateIgnorance(string propName, object? oldValue, object? newValue) => false;
protected virtual bool FieldRevisionIgnorance(string propName, object? oldValue, object? newValue) => false;
protected virtual bool ShouldIgnoreEntityRevision(string propName, PropertyEntry propEntry, EntityEntry entityEntry) => false;

}
public abstract partial class SaverWithRevision<TBaseRevision>
{
protected void SaveEntitiesWithRevision<TEntity, TRevision>(
CrawlerDbContext db,
Func<TEntity, TRevision> revisionFactory,
Expand Down Expand Up @@ -69,7 +71,7 @@ bool IsTimestampingFieldName(string name) => name is nameof(BasePost.LastSeenAt)
continue; // skip following revision check
}
if (FieldRevisionIgnorance(
pName, p.OriginalValue, p.CurrentValue)
pName, p.OriginalValue, p.CurrentValue)
|| (entityIsUser && userFieldRevisionIgnorance!(
pName, p.OriginalValue, p.CurrentValue)))
continue;
Expand Down
Loading

0 comments on commit 8d41363

Please sign in to comment.