diff --git a/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs index 322063c0..ec9b020d 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs @@ -14,8 +14,6 @@ public class ThreadCrawlFacade( postParser, postSaverFactory.Invoke, userParserFactory.Invoke, userSaverFactory.Invoke) { - private readonly Dictionary _latestRepliersKeyByUnique = []; - public delegate ThreadCrawlFacade New(Fid fid, string forumName); protected override void OnPostParse( @@ -44,21 +42,17 @@ join parsed in Posts.Values on (Tid)inResponse.Tid equals parsed.Tid { // replace with more detailed location.name in the 6.0.2 response t.parsed.Geolocation = Helper.SerializedProtoBufOrNullIfEmpty(t.inResponse.Location); } - var name = t.inResponse.LastReplyer.Name.NullIfEmpty(); - var nameShow = t.inResponse.LastReplyer.NameShow.NullIfEmpty(); + var lastReplyer = t.inResponse.LastReplyer; + var name = lastReplyer?.Name.NullIfEmpty(); + var nameShow = lastReplyer?.NameShow.NullIfEmpty(); // LastReplyer will be null when LivePostType != "", but LastTimeInt will have expected timestamp value - var latestReplierEntity = t.inResponse.LastReplyer == null ? null : new LatestReplier + t.parsed.LatestReplier = lastReplyer == null ? null : new LatestReplier { Name = name, #pragma warning disable S3358 // Ternary operators should not be nested DisplayName = name == nameShow ? null : nameShow #pragma warning restore S3358 // Ternary operators should not be nested }; - var uniqueLatestReplier = ThreadLatestReplierSaver.UniqueLatestReplier.FromLatestReplier(latestReplierEntity); - - var isExists = _latestRepliersKeyByUnique.TryGetValue(uniqueLatestReplier, out var existingLatestReplier); - if (!isExists) _latestRepliersKeyByUnique[uniqueLatestReplier] = latestReplierEntity; - t.parsed.LatestReplier = isExists ? existingLatestReplier : latestReplierEntity; }); } diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs index 2dbf598e..f1cbd4de 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs @@ -1,3 +1,5 @@ +using Microsoft.EntityFrameworkCore.ChangeTracking; + namespace tbm.Crawler.Tieba.Crawl.Saver.Related; public class ThreadLatestReplierSaver( @@ -10,19 +12,38 @@ public class ThreadLatestReplierSaver( public Action SaveFromThread(CrawlerDbContext db, IReadOnlyCollection threads) { - var uniqueLatestRepliers = threads + static void DetachAndReplace( + EntityEntry entityEntry, + ThreadPost thread, + LatestReplier newLatestReplier) + { + entityEntry.State = EntityState.Detached; + thread.LatestReplier = newLatestReplier; + }; + + var threadsGroupByUniqueLatestReplier = threads .Where(th => th.LatestReplier != null) - .Select(UniqueLatestReplier.FromThread).ToList(); + .GroupBy(UniqueLatestReplier.FromThread).ToList(); + threadsGroupByUniqueLatestReplier.ForEach(g => + (from thread in g.Skip(1) + join entityEntry in db.ChangeTracker.Entries() + on thread.LatestReplier equals entityEntry.Entity + select (thread, entityEntry)) + .ForEach(t => DetachAndReplace(t.entityEntry, t.thread, g.First().LatestReplier!))); + + var uniqueLatestRepliers = threadsGroupByUniqueLatestReplier.Select(g => g.Key).ToList(); var existingLatestRepliers = db.LatestRepliers.AsNoTracking().FilterByItems( - uniqueLatestRepliers, (latestReplier, uniqueLatestReplier) => - latestReplier.Name == uniqueLatestReplier.Name - && latestReplier.DisplayName == uniqueLatestReplier.DisplayName) + uniqueLatestRepliers, (latestReplier, uniqueLatestReplier) => + latestReplier.Name == uniqueLatestReplier.Name + && latestReplier.DisplayName == uniqueLatestReplier.DisplayName) .ToList(); (from existing in existingLatestRepliers join thread in threads on UniqueLatestReplier.FromLatestReplier(existing) equals UniqueLatestReplier.FromThread(thread) - select (existing, thread)) - .ForEach(t => t.thread.LatestReplier = t.existing); + join entityEntry in db.ChangeTracker.Entries() + on thread.LatestReplier equals entityEntry.Entity // Object.ReferenceEquals() + select (existing, thread, entityEntry)) + .ForEach(t => DetachAndReplace(t.entityEntry, t.thread, t.existing)); _ = _saverLocks.Value.Acquire(uniqueLatestRepliers .Except(existingLatestRepliers.Select(UniqueLatestReplier.FromLatestReplier))