From 18dd2c3a9b74096c7b552d59ba75d1a351207526 Mon Sep 17 00:00:00 2001 From: n0099 Date: Wed, 12 Jun 2024 20:37:02 +0000 Subject: [PATCH] * partial revert 388ddc9026ef45491d7db448a4e50f6a27024d3d since releasing locks before `DbContext.SaveChanges()` and transaction committed is too early @ c#/crawler --- .../Tieba/Crawl/Saver/AuthorRevisionSaver.cs | 5 +- .../Crawl/Saver/ReplyContentImageSaver.cs | 93 ++++++++----------- .../Tieba/Crawl/Saver/ReplySignatureSaver.cs | 5 +- 3 files changed, 44 insertions(+), 59 deletions(-) diff --git a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs index ca3f3b2f..5897bf71 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs @@ -17,8 +17,7 @@ public Action SaveAuthorExpGradeRevisions (CrawlerDbContext db, IReadOnlyCollection posts) where TPostWithAuthorExpGrade : PostWithAuthorExpGrade { - using var saverLocks = _authorExpGradeLocksSaverLocks.Value; - Save(db, posts, saverLocks, + Save(db, posts, _authorExpGradeLocksSaverLocks.Value, db.AuthorExpGradeRevisions, p => p.AuthorExpGrade, (a, b) => a != b, @@ -37,7 +36,7 @@ public Action SaveAuthorExpGradeRevisions TriggeredBy = triggeredByPostType, AuthorExpGrade = t.Value }); - return saverLocks.Dispose; + return _authorExpGradeLocksSaverLocks.Value.Dispose; } private void Save( diff --git a/c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs index f94d2de9..5c66bd3d 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs @@ -53,67 +53,54 @@ where images.Keys.Contains(e.UrlFilename) .ForEach(image => logger.LogWarning( "Wait for locking already locked image {} timed out after 10s", image.UrlFilename)); - var isGlobalLockReleased = false; - void ReleaseGlobalLocks() + if (alreadyLockedImages.Count != 0) + existingImages = existingImages + .Concat(( + from e in db.ImageInReplies.AsTracking() + where alreadyLockedImages.Keys().Contains(e.UrlFilename) + select e).ToDictionary(e => e.UrlFilename)) + .ToDictionary(); + (from existing in existingImages.Values + where existing.ExpectedByteSize == 0 // randomly respond with 0 + join newInContent in images.Values + on existing.UrlFilename equals newInContent.UrlFilename + select (existing, newInContent)) + .ForEach(t => t.existing.ExpectedByteSize = t.newInContent.ExpectedByteSize); + + (from existing in existingImages.Values + join replyContentImage in replyContentImages + on existing.UrlFilename equals replyContentImage.ImageInReply.UrlFilename + select (existing, replyContentImage)) + .ForEach(t => t.replyContentImage.ImageInReply = t.existing); + var existingReplyContentImages = db.ReplyContentImages.AsNoTracking() + .Where(replyContentImages.Aggregate( + LinqKit.PredicateBuilder.New(), + (predicate, newOrExisting) => + predicate.Or(LinqKit.PredicateBuilder + .New(existing => + existing.Pid == newOrExisting.Pid) + .And(existing => + existing.ImageInReply.UrlFilename == newOrExisting.ImageInReply.UrlFilename)))) + .Include(e => e.ImageInReply) + .Select(e => new {e.Pid, e.ImageInReply.UrlFilename}) + .ToList(); + db.ReplyContentImages.AddRange(replyContentImages + .ExceptBy(existingReplyContentImages.Select(e => (e.Pid, e.UrlFilename)), + e => (e.Pid, e.ImageInReply.UrlFilename))); + + return () => { try { - if (!isGlobalLockReleased && newlyLockedImages.Any(pair => + if (newlyLockedImages.Any(pair => !GlobalLockedImagesInReplyKeyByUrlFilename.TryRemove(pair))) throw new InvalidOperationException(); - isGlobalLockReleased = true; } finally { - if (!isGlobalLockReleased) - { - newlyLockedImages.Values().ForEach(Monitor.Exit); - alreadyLockedImages.Values().ForEach(Monitor.Exit); - } + newlyLockedImages.Values().ForEach(Monitor.Exit); + alreadyLockedImages.Values().ForEach(Monitor.Exit); } - } - try - { - if (alreadyLockedImages.Count != 0) - existingImages = existingImages - .Concat(( - from e in db.ImageInReplies.AsTracking() - where alreadyLockedImages.Keys().Contains(e.UrlFilename) - select e).ToDictionary(e => e.UrlFilename)) - .ToDictionary(); - - (from existing in existingImages.Values - where existing.ExpectedByteSize == 0 // randomly respond with 0 - join newInContent in images.Values - on existing.UrlFilename equals newInContent.UrlFilename - select (existing, newInContent)) - .ForEach(t => t.existing.ExpectedByteSize = t.newInContent.ExpectedByteSize); - - (from existing in existingImages.Values - join replyContentImage in replyContentImages - on existing.UrlFilename equals replyContentImage.ImageInReply.UrlFilename - select (existing, replyContentImage)) - .ForEach(t => t.replyContentImage.ImageInReply = t.existing); - var existingReplyContentImages = db.ReplyContentImages.AsNoTracking() - .Where(replyContentImages.Aggregate( - LinqKit.PredicateBuilder.New(), - (predicate, newOrExisting) => - predicate.Or(LinqKit.PredicateBuilder - .New(existing => - existing.Pid == newOrExisting.Pid) - .And(existing => - existing.ImageInReply.UrlFilename == newOrExisting.ImageInReply.UrlFilename)))) - .Include(e => e.ImageInReply) - .Select(e => new {e.Pid, e.ImageInReply.UrlFilename}) - .ToList(); - db.ReplyContentImages.AddRange(replyContentImages - .ExceptBy(existingReplyContentImages.Select(e => (e.Pid, e.UrlFilename)), - e => (e.Pid, e.ImageInReply.UrlFilename))); - return ReleaseGlobalLocks; - } - finally - { - ReleaseGlobalLocks(); - } + }; } } diff --git a/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs index 4538f374..7d558e15 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs @@ -12,7 +12,6 @@ public class ReplySignatureSaver( public Action Save(CrawlerDbContext db, IEnumerable replies) { - using var saverLocks = _saverLocks.Value; SharedHelper.GetNowTimestamp(out var now); var repliesWithSignature = replies .Where(r => r is {SignatureId: not null, Signature: not null}).ToList(); @@ -56,11 +55,11 @@ join newInReply in signatures on existing.SignatureId equals newInReply.Signatur var newSignatures = signatures .ExceptBy(existingSignatures.Select(s => s.SignatureId), s => s.SignatureId).ToList(); - var newlyLocked = saverLocks.Acquire( + var newlyLocked = _saverLocks.Value.Acquire( newSignatures.Select(s => new UniqueSignature(s.SignatureId, s.XxHash3)).ToList()); db.ReplySignatures.AddRange( newSignatures.IntersectBy(newlyLocked, s => new(s.SignatureId, s.XxHash3))); - return saverLocks.Dispose; + return _saverLocks.Value.Dispose; } [SuppressMessage("Class Design", "AV1000:Type name contains the word 'and', which suggests it has multiple purposes")]