From b480bea61eeaf83a167cdea2c159987b381de355 Mon Sep 17 00:00:00 2001 From: n0099 Date: Mon, 3 Jun 2024 08:26:34 +0800 Subject: [PATCH] * now will try locking both `(newly|already)LockedImages` for ten seconds timeout and release lcoks in the returned hooks that will be invoked after `DbContext.SaveChange()` * rename variable `imagesKeyByUrlFilename` to `images` * rename variable `(newly|already)Locked` to `(newly|already)LockedImages` @ `Save()` * rename field `LocksKeyByUrlFilename` to `GlobalLockedImagesInReplyKeyByUrlFilename` - primary ctor param `locks` @ ReplyContentImageSaver.cs @ c#/crawler --- .../src/Tieba/Crawl/Saver/Post/ReplySaver.cs | 2 +- .../Crawl/Saver/ReplyContentImageSaver.cs | 71 +++++++++++-------- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs index 94e28523..0d53094e 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs @@ -17,7 +17,7 @@ public override SaverChangeSet Save(CrawlerDbContext db) r => new ReplyRevision {TakenAt = r.UpdatedAt ?? r.CreatedAt, Pid = r.Pid}, LinqKit.PredicateBuilder.New(r => Posts.Keys.Contains(r.Pid))); - replyContentImageSaver.Save(db, changeSet.NewlyAdded); + PostSaveHandlers += replyContentImageSaver.Save(db, changeSet.NewlyAdded).Invoke; PostSaveHandlers += AuthorRevisionSaver.SaveAuthorExpGradeRevisions(db, changeSet.AllAfter).Invoke; PostSaveHandlers += replySignatureSaver.Save(db, changeSet.AllAfter).Invoke; diff --git a/c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs index a9b58062..16328b06 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs @@ -1,10 +1,11 @@ namespace tbm.Crawler.Tieba.Crawl.Saver; -public class ReplyContentImageSaver(SaverLocks locks) +public class ReplyContentImageSaver(ILogger logger) { - private static readonly ConcurrentDictionary LocksKeyByUrlFilename = new(); + private static readonly ConcurrentDictionary + GlobalLockedImagesInReplyKeyByUrlFilename = new(); - public void Save(CrawlerDbContext db, IEnumerable replies) + public Action Save(CrawlerDbContext db, IEnumerable replies) { var pidAndImageList = ( from r in replies @@ -19,39 +20,44 @@ from c in r.OriginalContents })) .DistinctBy(t => (t.Pid, t.Image.UrlFilename)) .ToList(); - if (pidAndImageList.Count == 0) return; - var imagesKeyByUrlFilename = pidAndImageList.Select(t => t.Image) + if (pidAndImageList.Count == 0) return () => { }; + var images = pidAndImageList.Select(t => t.Image) .DistinctBy(image => image.UrlFilename).ToDictionary(image => image.UrlFilename); var existingImages = ( from e in db.ImageInReplies.AsTracking() - where imagesKeyByUrlFilename.Keys.Contains(e.UrlFilename) + where images.Keys.Contains(e.UrlFilename) select e) .ToDictionary(e => e.UrlFilename); - var newImages = imagesKeyByUrlFilename.ExceptByKey(existingImages.Keys).Keys().ToList(); - var newlyLocked = locks.AcquireLocks(newImages); - var alreadyLocked = newImages.Except(newlyLocked).ToList(); + var newImages = images + .ExceptByKey(existingImages.Keys).ToDictionary(); - if (newlyLocked.Any(urlFilename => !LocksKeyByUrlFilename.TryAdd(urlFilename, new()))) - throw new InvalidOperationException(); - alreadyLocked.ForEach(urlFilename => - { - lock (LocksKeyByUrlFilename[urlFilename]) -#pragma warning disable S108 // Either remove or fill this block of code. - { - } -#pragma warning restore S108 // Either remove or fill this block of code. - }); + var newlyLockedImages = newImages + .Where(pair => GlobalLockedImagesInReplyKeyByUrlFilename.TryAdd(pair.Key, pair.Value)) + .ToDictionary(); + newlyLockedImages.Values() + .Where(reply => !Monitor.TryEnter(reply, TimeSpan.FromSeconds(10))) + .ForEach(image => logger.LogWarning( + "Wait for locking newly locked image {} timed out after 10s", image.UrlFilename)); + + var alreadyLockedImages = GlobalLockedImagesInReplyKeyByUrlFilename + .IntersectByKey(newImages + .Keys().Except(newlyLockedImages.Keys())) + .ToDictionary(); + alreadyLockedImages.Values() + .Where(reply => !Monitor.TryEnter(reply, TimeSpan.FromSeconds(10))) + .ForEach(image => logger.LogWarning( + "Wait for locking already locked image {} timed out after 10s", image.UrlFilename)); existingImages = existingImages .Concat(( from e in db.ImageInReplies.AsTracking() - where alreadyLocked.Contains(e.UrlFilename) + where alreadyLockedImages.Keys().Contains(e.UrlFilename) select e).ToDictionary(e => e.UrlFilename)) .ToDictionary(); (from existing in existingImages.Values where existing.ExpectedByteSize == 0 // randomly respond with 0 - join newInContent in imagesKeyByUrlFilename.Values + join newInContent in images.Values on existing.UrlFilename equals newInContent.UrlFilename select (existing, newInContent)) .ForEach(t => t.existing.ExpectedByteSize = t.newInContent.ExpectedByteSize); @@ -62,19 +68,28 @@ on existing.UrlFilename equals newInContent.UrlFilename // no need to manually invoke DbContext.AddRange(images) since EF Core will do these chore // https://stackoverflow.com/questions/5212751/how-can-i-retrieve-id-of-inserted-entity-using-entity-framework/41146434#41146434 - // reuse the same instance from imagesKeyByUrlFilename + // reuse the same instance from existingImages // will prevent assigning multiple different instances with the same key // which will cause EF Core to insert identify entry more than one time leading to duplicated entry error // https://github.com/dotnet/efcore/issues/30236 ImageInReply = existingImages.TryGetValue(t.Image.UrlFilename, out var e) ? e - : imagesKeyByUrlFilename[t.Image.UrlFilename] + : images[t.Image.UrlFilename] })); - if (newlyLocked.Any(urlFilename => !LocksKeyByUrlFilename.TryRemove(urlFilename, out _))) - throw new InvalidOperationException(); -#pragma warning disable IDISP007 // Don't dispose injected - locks.Dispose(); -#pragma warning restore IDISP007 // Don't dispose injected + return () => + { + try + { + if (newlyLockedImages.Any(pair => + !GlobalLockedImagesInReplyKeyByUrlFilename.TryRemove(pair))) + throw new InvalidOperationException(); + } + finally + { + newlyLockedImages.Values().ForEach(Monitor.Exit); + alreadyLockedImages.Values().ForEach(Monitor.Exit); + } + }; } }