From 4c2e953065d525f4b87cc9d940b0a85262ffd639 Mon Sep 17 00:00:00 2001 From: n0099 Date: Sun, 2 Jun 2024 07:48:53 +0800 Subject: [PATCH] * prevent selecting all fields of entity `TRevision` by casting `IQueryable` to `IQueryable` then project to `RevisionIdWithDuplicateIndexProjectionFactory()` * quick exit when param `newRevisions` is empty to prevent execute sql with WHERE FALSE clause @ `AddRevisionsWithDuplicateIndex()` + method `RevisionIdWithDuplicateIndexProjectionFactory()` & nested class `RevisionIdWithDuplicateIndexProjection` * rename method `IsRevisionEntityIdEqualsExpression()` & `RevisionEntityIdSelector()` to `IsRevisionIdEqualsExpression()` & `RevisionIdSelector()` * rename type param `TEntityId` to `TRevisionId` and constrained to `struct` @ SaverWithRevision.cs @ c#/crawler --- .../src/Tieba/Crawl/Saver/Post/PostSaver.cs | 1 + .../src/Tieba/Crawl/Saver/Post/ReplySaver.cs | 7 +++-- .../Tieba/Crawl/Saver/Post/SubReplySaver.cs | 7 +++-- .../src/Tieba/Crawl/Saver/Post/ThreadSaver.cs | 7 +++-- .../Tieba/Crawl/Saver/SaverWithRevision.cs | 27 ++++++++++++++----- c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs | 7 +++-- 6 files changed, 41 insertions(+), 15 deletions(-) diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/PostSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/PostSaver.cs index 39c72a8e..2a04d318 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/PostSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/PostSaver.cs @@ -10,6 +10,7 @@ public abstract class PostSaver( : SaverWithRevision(logger), IPostSaver where TPost : BasePost where TBaseRevision : BaseRevisionWithSplitting + where TPostId : struct { protected delegate void PostSaveHandler(); diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs index 64898e08..24190a49 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs @@ -46,10 +46,13 @@ public override SaverChangeSet Save(CrawlerDbContext db) return changeSet; } - protected override Pid RevisionEntityIdSelector(BaseReplyRevision entity) => entity.Pid; + protected override Pid RevisionIdSelector(BaseReplyRevision entity) => entity.Pid; protected override Expression> - IsRevisionEntityIdEqualsExpression(BaseReplyRevision newRevision) => + IsRevisionIdEqualsExpression(BaseReplyRevision newRevision) => existingRevision => existingRevision.Pid == newRevision.Pid; + protected override Expression> + RevisionIdWithDuplicateIndexProjectionFactory() => + e => new() {RevisionId = e.Pid, DuplicateIndex = e.DuplicateIndex}; protected override bool FieldUpdateIgnorance (string propName, object? oldValue, object? newValue) => propName switch diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs index 9cb59fb3..ed6a409e 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs @@ -43,10 +43,13 @@ public override SaverChangeSet Save(CrawlerDbContext db) return changeSet; } - protected override Spid RevisionEntityIdSelector(BaseSubReplyRevision entity) => entity.Spid; + protected override Spid RevisionIdSelector(BaseSubReplyRevision entity) => entity.Spid; protected override Expression> - IsRevisionEntityIdEqualsExpression(BaseSubReplyRevision newRevision) => + IsRevisionIdEqualsExpression(BaseSubReplyRevision newRevision) => existingRevision => existingRevision.Spid == newRevision.Spid; + protected override Expression> + RevisionIdWithDuplicateIndexProjectionFactory() => + e => new() {RevisionId = e.Spid, DuplicateIndex = e.DuplicateIndex}; protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => 0; } diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs index 4079b26e..ebf1e6c5 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs @@ -35,10 +35,13 @@ public override SaverChangeSet Save(CrawlerDbContext db) => th => new ThreadRevision {TakenAt = th.UpdatedAt ?? th.CreatedAt, Tid = th.Tid}, PredicateBuilder.New(th => Posts.Keys.Contains(th.Tid))); - protected override Tid RevisionEntityIdSelector(BaseThreadRevision entity) => entity.Tid; + protected override Tid RevisionIdSelector(BaseThreadRevision entity) => entity.Tid; protected override Expression> - IsRevisionEntityIdEqualsExpression(BaseThreadRevision newRevision) => + IsRevisionIdEqualsExpression(BaseThreadRevision newRevision) => existingRevision => existingRevision.Tid == newRevision.Tid; + protected override Expression> + RevisionIdWithDuplicateIndexProjectionFactory() => + e => new() {RevisionId = e.Tid, DuplicateIndex = e.DuplicateIndex}; protected override bool FieldUpdateIgnorance (string propName, object? oldValue, object? newValue) => propName switch diff --git a/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs b/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs index d517d29d..6efed976 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs @@ -2,10 +2,11 @@ namespace tbm.Crawler.Tieba.Crawl.Saver; -public abstract partial class SaverWithRevision( - ILogger> logger) +public abstract partial class SaverWithRevision( + ILogger> logger) : IRevisionProperties where TBaseRevision : BaseRevisionWithSplitting + where TRevisionId : struct { protected delegate void AddSplitRevisionsDelegate(CrawlerDbContext db, IEnumerable revisions); protected abstract Lazy> @@ -15,6 +16,7 @@ protected void AddRevisionsWithDuplicateIndex(CrawlerDbContext db, IE where TRevision : TBaseRevision { var newRevisions = revisions.OfType().ToList(); + if (newRevisions.Count == 0) return; // quick exit to prevent execute sql with WHERE FALSE clause var dbSet = db.Set(); var visitor = new ReplaceParameterTypeVisitor(); @@ -26,11 +28,13 @@ protected void AddRevisionsWithDuplicateIndex(CrawlerDbContext db, IE (predicate, newRevision) => predicate.Or(LinqKit.PredicateBuilder .New(existingRevision => existingRevision.TakenAt == newRevision.TakenAt) .And((Expression>)visitor - .Visit(IsRevisionEntityIdEqualsExpression(newRevision)))))) + .Visit(IsRevisionIdEqualsExpression(newRevision)))))) + .Cast() + .Select(RevisionIdWithDuplicateIndexProjectionFactory()) .ToList(); (from existingRevision in existingRevisions join newRevision in newRevisions - on RevisionEntityIdSelector(existingRevision) equals RevisionEntityIdSelector(newRevision) + on existingRevision.RevisionId equals RevisionIdSelector(newRevision) select (existingRevision, newRevision)) .ForEach(t => t.newRevision.DuplicateIndex = (ushort)(t.existingRevision.DuplicateIndex + 1)); @@ -39,9 +43,18 @@ on RevisionEntityIdSelector(existingRevision) equals RevisionEntityIdSelector(ne protected abstract NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName); - protected abstract TEntityId RevisionEntityIdSelector(TBaseRevision entity); + protected abstract TRevisionId RevisionIdSelector(TBaseRevision entity); protected abstract Expression> - IsRevisionEntityIdEqualsExpression(TBaseRevision newRevision); + IsRevisionIdEqualsExpression(TBaseRevision newRevision); + + protected abstract Expression> + RevisionIdWithDuplicateIndexProjectionFactory(); + [SuppressMessage("ReSharper", "PropertyCanBeMadeInitOnly.Global")] + protected class RevisionIdWithDuplicateIndexProjection + { + public TRevisionId RevisionId { get; set; } + public ushort DuplicateIndex { get; set; } + } protected virtual bool ShouldIgnoreEntityRevision(string propName, PropertyEntry propEntry, EntityEntry entityEntry) => false; protected virtual bool FieldUpdateIgnorance(string propName, object? oldValue, object? newValue) => false; @@ -52,7 +65,7 @@ protected abstract Expression> _ => false }; } -public abstract partial class SaverWithRevision +public abstract partial class SaverWithRevision { protected void SaveEntitiesWithRevision( CrawlerDbContext db, diff --git a/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs index cf8b3f1f..36f44f31 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs @@ -14,10 +14,13 @@ protected override Lazy> {typeof(UserRevision.SplitIpGeolocation), AddRevisionsWithDuplicateIndex} }); - protected override Uid RevisionEntityIdSelector(BaseUserRevision entity) => entity.Uid; + protected override Uid RevisionIdSelector(BaseUserRevision entity) => entity.Uid; protected override Expression> - IsRevisionEntityIdEqualsExpression(BaseUserRevision newRevision) => + IsRevisionIdEqualsExpression(BaseUserRevision newRevision) => existingRevision => existingRevision.Uid == newRevision.Uid; + protected override Expression> + RevisionIdWithDuplicateIndexProjectionFactory() => + e => new() {RevisionId = e.Uid, DuplicateIndex = e.DuplicateIndex}; protected override bool ShouldIgnoreEntityRevision(string propName, PropertyEntry propEntry, EntityEntry entityEntry) {