From 54ae1be697a2af431ae0d789a8afb70c2dca65d8 Mon Sep 17 00:00:00 2001 From: n0099 Date: Sat, 30 Mar 2024 11:56:03 +0800 Subject: [PATCH] * split abstract class `CommonInSavers<>` into two abstract classes `BaseSaver<>` & `SaverWithRevision<>` to fix `AV1000: Type '' contains the word 'and', which suggests it has multiple purposes` * split abstract class `StaticCommonInSavers` into interface `IFieldChangeIgnorance` & `IRevisionProperties` to fix `AV1000` @ c#/crawler --- .../Tieba/Crawl/Facade/ThreadCrawlFacade.cs | 2 +- .../Saver/{CommonInSavers.cs => BaseSaver.cs} | 21 +++++++------------ ...onInSavers.cs => IFieldChangeIgnorance.cs} | 21 +++++++------------ .../Tieba/Crawl/Saver/IRevisionProperties.cs | 11 ++++++++++ .../Tieba/Crawl/Saver/Post/BasePostSaver.cs | 8 +++---- .../src/Tieba/Crawl/Saver/Post/ReplySaver.cs | 3 ++- .../Tieba/Crawl/Saver/Post/SubReplySaver.cs | 3 ++- .../src/Tieba/Crawl/Saver/Post/ThreadSaver.cs | 3 ++- .../Tieba/Crawl/Saver/SaverWithRevision.cs | 13 ++++++++++++ c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs | 4 ++-- 10 files changed, 51 insertions(+), 38 deletions(-) rename c#/crawler/src/Tieba/Crawl/Saver/{CommonInSavers.cs => BaseSaver.cs} (87%) rename c#/crawler/src/Tieba/Crawl/Saver/{StaticCommonInSavers.cs => IFieldChangeIgnorance.cs} (86%) create mode 100644 c#/crawler/src/Tieba/Crawl/Saver/IRevisionProperties.cs create mode 100644 c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs diff --git a/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs index 8cffc81e..3965c216 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs @@ -21,7 +21,7 @@ public class ThreadCrawlFacade( protected override void BeforeCommitSaveHook(CrawlerDbContext db, UserSaver userSaver) { // BeforeCommitSaveHook() should get invoked after UserSaver.Save() by the base.SaveCrawled() // so only latest repliers that not exists in parsed users are being inserted - // note this will bypass user revision detection since not invoking CommonInSavers.SavePostsOrUsers() but directly DbContext.AddRange() + // note this will bypass user revision detection since not invoking BaseSaver.SavePostsOrUsers() but directly DbContext.AddRange() // users has already been added into DbContext and tracking var existingUsersId = db.ChangeTracker.Entries().Select(ee => ee.Entity.Uid); diff --git a/c#/crawler/src/Tieba/Crawl/Saver/CommonInSavers.cs b/c#/crawler/src/Tieba/Crawl/Saver/BaseSaver.cs similarity index 87% rename from c#/crawler/src/Tieba/Crawl/Saver/CommonInSavers.cs rename to c#/crawler/src/Tieba/Crawl/Saver/BaseSaver.cs index 0cbf68c0..92dbfc1e 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/CommonInSavers.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/BaseSaver.cs @@ -2,18 +2,13 @@ namespace tbm.Crawler.Tieba.Crawl.Saver; -public abstract class CommonInSavers(ILogger> logger) - : StaticCommonInSavers +public abstract class BaseSaver(ILogger> logger) + : SaverWithRevision, IFieldChangeIgnorance where TBaseRevision : class, IRevision { - protected delegate void RevisionUpsertDelegate(CrawlerDbContext db, IEnumerable revision); - - protected virtual IDictionary - RevisionUpsertDelegatesKeyBySplitEntityType => throw new NotSupportedException(); - protected void SavePostsOrUsers( CrawlerDbContext db, - FieldChangeIgnoranceDelegates userFieldChangeIgnorance, + IFieldChangeIgnorance.FieldChangeIgnoranceDelegates userFieldChangeIgnorance, Func revisionFactory, ILookup existingOrNewLookup, Func existingSelector) @@ -48,21 +43,21 @@ bool IsTimestampingFieldName(string name) => name is nameof(IPost.LastSeenAt) var pName = p.Metadata.Name; if (!p.IsModified || IsTimestampingFieldName(pName)) continue; - if (GlobalFieldChangeIgnorance.Update(whichPostType, pName, p.OriginalValue, p.CurrentValue) + if (IFieldChangeIgnorance.GlobalFieldChangeIgnorance.Update(whichPostType, pName, p.OriginalValue, p.CurrentValue) || (entryIsUser && userFieldChangeIgnorance.Update( whichPostType, pName, p.OriginalValue, p.CurrentValue))) { p.IsModified = false; continue; // skip following revision check } - if (GlobalFieldChangeIgnorance.Revision(whichPostType, pName, p.OriginalValue, p.CurrentValue) + if (IFieldChangeIgnorance.GlobalFieldChangeIgnorance.Revision(whichPostType, pName, p.OriginalValue, p.CurrentValue) || (entryIsUser && userFieldChangeIgnorance.Revision( whichPostType, pName, p.OriginalValue, p.CurrentValue))) continue; if (IsLatestReplierUser(pName, p, entry)) return null; - if (!RevisionPropertiesCache[typeof(TRevision)].TryGetValue(pName, out var revisionProp)) + if (!IRevisionProperties.Cache[typeof(TRevision)].TryGetValue(pName, out var revisionProp)) { object? ToHexWhenByteArray(object? value) => value is byte[] bytes ? $"0x{Convert.ToHexString(bytes).ToLowerInvariant()}" : value; @@ -109,14 +104,12 @@ bool IsTimestampingFieldName(string name) => name is nameof(IPost.LastSeenAt) .ForEach(g => RevisionUpsertDelegatesKeyBySplitEntityType[g.Key](db, g)); } - protected virtual NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => throw new NotSupportedException(); - private static bool IsLatestReplierUser(string pName, PropertyEntry p, EntityEntry entry) { // ThreadCrawlFacade.ParseLatestRepliers() will save users with empty string as portrait // they will soon be updated by (sub) reply crawler after it find out the latest reply // so we should ignore its revision update for all fields - // ignore entire record is not possible via GlobalFieldChangeIgnorance.Revision() + // ignore entire record is not possible via IFieldChangeIgnorance.GlobalFieldChangeIgnorance.Revision() // since it can only determine one field at the time if (pName != nameof(User.Portrait) || p.OriginalValue is not "") return false; diff --git a/c#/crawler/src/Tieba/Crawl/Saver/StaticCommonInSavers.cs b/c#/crawler/src/Tieba/Crawl/Saver/IFieldChangeIgnorance.cs similarity index 86% rename from c#/crawler/src/Tieba/Crawl/Saver/StaticCommonInSavers.cs rename to c#/crawler/src/Tieba/Crawl/Saver/IFieldChangeIgnorance.cs index 32a5acf4..d7e73216 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/StaticCommonInSavers.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/IFieldChangeIgnorance.cs @@ -1,14 +1,16 @@ namespace tbm.Crawler.Tieba.Crawl.Saver; -public abstract class StaticCommonInSavers +public partial interface IFieldChangeIgnorance { public delegate bool FieldChangeIgnoranceDelegate( Type whichPostType, string propName, object? oldValue, object? newValue); - // static field in this non-generic class will be shared across all reified generic derived classes - protected static IDictionary> RevisionPropertiesCache { get; } = GetPropsKeyByType( - [typeof(ThreadRevision), typeof(ReplyRevision), typeof(SubReplyRevision), typeof(UserRevision)]); - + public record FieldChangeIgnoranceDelegates( + FieldChangeIgnoranceDelegate Update, + FieldChangeIgnoranceDelegate Revision); +} +public partial interface IFieldChangeIgnorance +{ protected static FieldChangeIgnoranceDelegates GlobalFieldChangeIgnorance { get; } = new( Update: (whichPostType, propName, oldValue, newValue) => { @@ -90,13 +92,4 @@ when newValue is "" } return false; }); - - [SuppressMessage("Performance", "CA1859:Use concrete types when possible for improved performance")] - private static IDictionary> GetPropsKeyByType(IEnumerable types) => - types.ToDictionary(type => type, type => - (IDictionary)type.GetProperties().ToDictionary(prop => prop.Name)); - - public record FieldChangeIgnoranceDelegates( - FieldChangeIgnoranceDelegate Update, - FieldChangeIgnoranceDelegate Revision); } diff --git a/c#/crawler/src/Tieba/Crawl/Saver/IRevisionProperties.cs b/c#/crawler/src/Tieba/Crawl/Saver/IRevisionProperties.cs new file mode 100644 index 00000000..3e7a209b --- /dev/null +++ b/c#/crawler/src/Tieba/Crawl/Saver/IRevisionProperties.cs @@ -0,0 +1,11 @@ +namespace tbm.Crawler.Tieba.Crawl.Saver; + +public interface IRevisionProperties +{ + protected static IDictionary> Cache { get; } = GetPropsKeyByType( + [typeof(ThreadRevision), typeof(ReplyRevision), typeof(SubReplyRevision), typeof(UserRevision)]); + + private static IDictionary> GetPropsKeyByType(IEnumerable types) => + types.ToDictionary(type => type, type => + (IDictionary)type.GetProperties().ToDictionary(prop => prop.Name)); +} diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/BasePostSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/BasePostSaver.cs index f4195110..4421ddeb 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/BasePostSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/BasePostSaver.cs @@ -7,7 +7,7 @@ public abstract class BasePostSaver( ConcurrentDictionary posts, AuthorRevisionSaver.New authorRevisionSaverFactory, string postType) - : CommonInSavers(logger) + : BaseSaver(logger) where TPost : class, IPost where TBaseRevision : class, IRevision { @@ -15,8 +15,8 @@ public abstract class BasePostSaver( [SuppressMessage("Design", "MA0046:Use EventHandler to declare events")] protected event PostSaveEventHandler PostSaveEvent = () => { }; - public virtual FieldChangeIgnoranceDelegates UserFieldChangeIgnorance => - throw new NotSupportedException(); + public virtual IFieldChangeIgnorance.FieldChangeIgnoranceDelegates + UserFieldChangeIgnorance => throw new NotSupportedException(); public string PostType { get; } = postType; protected ConcurrentDictionary Posts { get; } = posts; protected AuthorRevisionSaver AuthorRevisionSaver { get; } = authorRevisionSaverFactory(postType); @@ -35,7 +35,7 @@ protected SaverChangeSet Save( var existingPostsKeyById = dbSet.Where(existingPostPredicate).ToDictionary(postIdSelector); - // deep copy before entities get mutated by CommonInSavers.SavePostsOrUsers() + // deep copy before entities get mutated by BaseSaver.SavePostsOrUsers() var existingBeforeMerge = existingPostsKeyById.Select(pair => (TPost)pair.Value.Clone()).ToList(); SavePostsOrUsers(db, UserFieldChangeIgnorance, revisionFactory, diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs index 24f250f1..a0c71108 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs @@ -12,7 +12,8 @@ public partial class ReplySaver( { public delegate ReplySaver New(ConcurrentDictionary posts); - public override FieldChangeIgnoranceDelegates UserFieldChangeIgnorance { get; } = new( + public override IFieldChangeIgnorance.FieldChangeIgnoranceDelegates + UserFieldChangeIgnorance { get; } = new( Update: (_, propName, oldValue, newValue) => propName switch { // FansNickname in reply response will always be null nameof(User.FansNickname) when oldValue is not null && newValue is null => true, diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs index 8ed54756..400212d8 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs @@ -11,7 +11,8 @@ public class SubReplySaver( { public delegate SubReplySaver New(ConcurrentDictionary posts); - public override FieldChangeIgnoranceDelegates UserFieldChangeIgnorance { get; } = new( + public override IFieldChangeIgnorance.FieldChangeIgnoranceDelegates + UserFieldChangeIgnorance { get; } = new( Update: (_, propName, oldValue, newValue) => propName switch { // always ignore updates on iconinfo due to some rare user will show some extra icons // compare to reply response in the response of sub reply diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs index 85301d0a..9d398932 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs @@ -11,7 +11,8 @@ public class ThreadSaver( { public delegate ThreadSaver New(ConcurrentDictionary posts); - public override FieldChangeIgnoranceDelegates UserFieldChangeIgnorance { get; } = new( + public override IFieldChangeIgnorance.FieldChangeIgnoranceDelegates + UserFieldChangeIgnorance { get; } = new( Update: (_, propName, _, _) => propName switch { // Icon.SpriteInfo will be an empty array and the icon url is a smaller one // so we should mark it as null temporarily diff --git a/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs b/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs new file mode 100644 index 00000000..0dbe91fa --- /dev/null +++ b/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs @@ -0,0 +1,13 @@ +namespace tbm.Crawler.Tieba.Crawl.Saver; + +public abstract class SaverWithRevision : IRevisionProperties + where TBaseRevision : class, IRevision +{ + protected delegate void RevisionUpsertDelegate(CrawlerDbContext db, IEnumerable revision); + + protected virtual IDictionary RevisionUpsertDelegatesKeyBySplitEntityType => + throw new NotSupportedException(); + + protected virtual NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => + throw new NotSupportedException(); +} diff --git a/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs index 202d47b7..85a81246 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs @@ -32,7 +32,7 @@ protected override Dictionary }; } public partial class UserSaver(ILogger logger, ConcurrentDictionary users) - : CommonInSavers(logger) + : BaseSaver(logger) { private static readonly HashSet UserIdLocks = []; private readonly List _savedUsersId = []; @@ -42,7 +42,7 @@ public partial class UserSaver(ILogger logger, ConcurrentDictionary