From 6d6d309557738de265e0d7b39bb4ae542a6eefbe Mon Sep 17 00:00:00 2001 From: n0099 Date: Thu, 11 Jul 2024 22:30:04 +0000 Subject: [PATCH] * reuse the first entity across attached entities that matching with the same `UniqueLatestReplier` to re-achieve c8f49206c93e7dda21f4d00b2e6dd860f04b790d * fix entities that already exists are still attached in `DbContext.ChangeTracker` + local function `DetachAndReplace()` @ `ThreadLatestReplierSaver.SaveFromThread()` - field `_latestRepliersKeyByUnique` to let `FillFromRequestingWith602()` directly assign new instances of related entity `LatestReplier` @ ThreadCrawlFacade.cs @ c#/crawler --- .../Tieba/Crawl/Facade/ThreadCrawlFacade.cs | 14 +++----- .../Saver/Related/ThreadLatestReplierSaver.cs | 35 +++++++++++++++---- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs index 322063c0..ec9b020d 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs @@ -14,8 +14,6 @@ public class ThreadCrawlFacade( postParser, postSaverFactory.Invoke, userParserFactory.Invoke, userSaverFactory.Invoke) { - private readonly Dictionary _latestRepliersKeyByUnique = []; - public delegate ThreadCrawlFacade New(Fid fid, string forumName); protected override void OnPostParse( @@ -44,21 +42,17 @@ join parsed in Posts.Values on (Tid)inResponse.Tid equals parsed.Tid { // replace with more detailed location.name in the 6.0.2 response t.parsed.Geolocation = Helper.SerializedProtoBufOrNullIfEmpty(t.inResponse.Location); } - var name = t.inResponse.LastReplyer.Name.NullIfEmpty(); - var nameShow = t.inResponse.LastReplyer.NameShow.NullIfEmpty(); + var lastReplyer = t.inResponse.LastReplyer; + var name = lastReplyer?.Name.NullIfEmpty(); + var nameShow = lastReplyer?.NameShow.NullIfEmpty(); // LastReplyer will be null when LivePostType != "", but LastTimeInt will have expected timestamp value - var latestReplierEntity = t.inResponse.LastReplyer == null ? null : new LatestReplier + t.parsed.LatestReplier = lastReplyer == null ? null : new LatestReplier { Name = name, #pragma warning disable S3358 // Ternary operators should not be nested DisplayName = name == nameShow ? null : nameShow #pragma warning restore S3358 // Ternary operators should not be nested }; - var uniqueLatestReplier = ThreadLatestReplierSaver.UniqueLatestReplier.FromLatestReplier(latestReplierEntity); - - var isExists = _latestRepliersKeyByUnique.TryGetValue(uniqueLatestReplier, out var existingLatestReplier); - if (!isExists) _latestRepliersKeyByUnique[uniqueLatestReplier] = latestReplierEntity; - t.parsed.LatestReplier = isExists ? existingLatestReplier : latestReplierEntity; }); } diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs index 2dbf598e..853bb577 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs @@ -1,3 +1,5 @@ +using Microsoft.EntityFrameworkCore.ChangeTracking; + namespace tbm.Crawler.Tieba.Crawl.Saver.Related; public class ThreadLatestReplierSaver( @@ -10,19 +12,38 @@ public class ThreadLatestReplierSaver( public Action SaveFromThread(CrawlerDbContext db, IReadOnlyCollection threads) { - var uniqueLatestRepliers = threads + static void DetachThenReplace( + EntityEntry entityEntry, + ThreadPost thread, + LatestReplier newLatestReplier) + { + entityEntry.State = EntityState.Detached; + thread.LatestReplier = newLatestReplier; + } + + var threadsGroupByUniqueLatestReplier = threads .Where(th => th.LatestReplier != null) - .Select(UniqueLatestReplier.FromThread).ToList(); + .GroupBy(UniqueLatestReplier.FromThread).ToList(); + threadsGroupByUniqueLatestReplier.ForEach(g => + (from thread in g.Skip(1) + join entityEntry in db.ChangeTracker.Entries() + on thread.LatestReplier equals entityEntry.Entity + select (thread, entityEntry)) + .ForEach(t => DetachThenReplace(t.entityEntry, t.thread, g.First().LatestReplier!))); + + var uniqueLatestRepliers = threadsGroupByUniqueLatestReplier.Select(g => g.Key).ToList(); var existingLatestRepliers = db.LatestRepliers.AsNoTracking().FilterByItems( - uniqueLatestRepliers, (latestReplier, uniqueLatestReplier) => - latestReplier.Name == uniqueLatestReplier.Name - && latestReplier.DisplayName == uniqueLatestReplier.DisplayName) + uniqueLatestRepliers, (latestReplier, uniqueLatestReplier) => + latestReplier.Name == uniqueLatestReplier.Name + && latestReplier.DisplayName == uniqueLatestReplier.DisplayName) .ToList(); (from existing in existingLatestRepliers join thread in threads on UniqueLatestReplier.FromLatestReplier(existing) equals UniqueLatestReplier.FromThread(thread) - select (existing, thread)) - .ForEach(t => t.thread.LatestReplier = t.existing); + join entityEntry in db.ChangeTracker.Entries() + on thread.LatestReplier equals entityEntry.Entity // Object.ReferenceEquals() + select (existing, thread, entityEntry)) + .ForEach(t => DetachThenReplace(t.entityEntry, t.thread, t.existing)); _ = _saverLocks.Value.Acquire(uniqueLatestRepliers .Except(existingLatestRepliers.Select(UniqueLatestReplier.FromLatestReplier))