Skip to content

Commit

Permalink
* partial revert 388ddc9 since releasing locks before `DbContext.Save…
Browse files Browse the repository at this point in the history
…Changes()` and transaction committed is too early @ c#/crawler
  • Loading branch information
n0099 committed Jun 12, 2024
1 parent 2537cad commit 18dd2c3
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 59 deletions.
5 changes: 2 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ public Action SaveAuthorExpGradeRevisions<TPostWithAuthorExpGrade>
(CrawlerDbContext db, IReadOnlyCollection<TPostWithAuthorExpGrade> posts)
where TPostWithAuthorExpGrade : PostWithAuthorExpGrade
{
using var saverLocks = _authorExpGradeLocksSaverLocks.Value;
Save(db, posts, saverLocks,
Save(db, posts, _authorExpGradeLocksSaverLocks.Value,
db.AuthorExpGradeRevisions,
p => p.AuthorExpGrade,
(a, b) => a != b,
Expand All @@ -37,7 +36,7 @@ public Action SaveAuthorExpGradeRevisions<TPostWithAuthorExpGrade>
TriggeredBy = triggeredByPostType,
AuthorExpGrade = t.Value
});
return saverLocks.Dispose;
return _authorExpGradeLocksSaverLocks.Value.Dispose;
}

private void Save<TPost, TRevision, TValue>(
Expand Down
93 changes: 40 additions & 53 deletions c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,67 +53,54 @@ where images.Keys.Contains(e.UrlFilename)
.ForEach(image => logger.LogWarning(
"Wait for locking already locked image {} timed out after 10s", image.UrlFilename));

var isGlobalLockReleased = false;
void ReleaseGlobalLocks()
if (alreadyLockedImages.Count != 0)
existingImages = existingImages
.Concat((
from e in db.ImageInReplies.AsTracking()
where alreadyLockedImages.Keys().Contains(e.UrlFilename)
select e).ToDictionary(e => e.UrlFilename))
.ToDictionary();
(from existing in existingImages.Values
where existing.ExpectedByteSize == 0 // randomly respond with 0
join newInContent in images.Values
on existing.UrlFilename equals newInContent.UrlFilename
select (existing, newInContent))
.ForEach(t => t.existing.ExpectedByteSize = t.newInContent.ExpectedByteSize);

(from existing in existingImages.Values
join replyContentImage in replyContentImages
on existing.UrlFilename equals replyContentImage.ImageInReply.UrlFilename
select (existing, replyContentImage))
.ForEach(t => t.replyContentImage.ImageInReply = t.existing);
var existingReplyContentImages = db.ReplyContentImages.AsNoTracking()
.Where(replyContentImages.Aggregate(
LinqKit.PredicateBuilder.New<ReplyContentImage>(),
(predicate, newOrExisting) =>
predicate.Or(LinqKit.PredicateBuilder
.New<ReplyContentImage>(existing =>
existing.Pid == newOrExisting.Pid)
.And(existing =>
existing.ImageInReply.UrlFilename == newOrExisting.ImageInReply.UrlFilename))))
.Include(e => e.ImageInReply)
.Select(e => new {e.Pid, e.ImageInReply.UrlFilename})
.ToList();
db.ReplyContentImages.AddRange(replyContentImages
.ExceptBy(existingReplyContentImages.Select(e => (e.Pid, e.UrlFilename)),
e => (e.Pid, e.ImageInReply.UrlFilename)));

return () =>
{
try
{
if (!isGlobalLockReleased && newlyLockedImages.Any(pair =>
if (newlyLockedImages.Any(pair =>
!GlobalLockedImagesInReplyKeyByUrlFilename.TryRemove(pair)))
throw new InvalidOperationException();
isGlobalLockReleased = true;
}
finally
{
if (!isGlobalLockReleased)
{
newlyLockedImages.Values().ForEach(Monitor.Exit);
alreadyLockedImages.Values().ForEach(Monitor.Exit);
}
newlyLockedImages.Values().ForEach(Monitor.Exit);
alreadyLockedImages.Values().ForEach(Monitor.Exit);
}
}
try
{
if (alreadyLockedImages.Count != 0)
existingImages = existingImages
.Concat((
from e in db.ImageInReplies.AsTracking()
where alreadyLockedImages.Keys().Contains(e.UrlFilename)
select e).ToDictionary(e => e.UrlFilename))
.ToDictionary();

(from existing in existingImages.Values
where existing.ExpectedByteSize == 0 // randomly respond with 0
join newInContent in images.Values
on existing.UrlFilename equals newInContent.UrlFilename
select (existing, newInContent))
.ForEach(t => t.existing.ExpectedByteSize = t.newInContent.ExpectedByteSize);

(from existing in existingImages.Values
join replyContentImage in replyContentImages
on existing.UrlFilename equals replyContentImage.ImageInReply.UrlFilename
select (existing, replyContentImage))
.ForEach(t => t.replyContentImage.ImageInReply = t.existing);
var existingReplyContentImages = db.ReplyContentImages.AsNoTracking()
.Where(replyContentImages.Aggregate(
LinqKit.PredicateBuilder.New<ReplyContentImage>(),
(predicate, newOrExisting) =>
predicate.Or(LinqKit.PredicateBuilder
.New<ReplyContentImage>(existing =>
existing.Pid == newOrExisting.Pid)
.And(existing =>
existing.ImageInReply.UrlFilename == newOrExisting.ImageInReply.UrlFilename))))
.Include(e => e.ImageInReply)
.Select(e => new {e.Pid, e.ImageInReply.UrlFilename})
.ToList();
db.ReplyContentImages.AddRange(replyContentImages
.ExceptBy(existingReplyContentImages.Select(e => (e.Pid, e.UrlFilename)),
e => (e.Pid, e.ImageInReply.UrlFilename)));
return ReleaseGlobalLocks;
}
finally
{
ReleaseGlobalLocks();
}
};
}
}
5 changes: 2 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ public class ReplySignatureSaver(

public Action Save(CrawlerDbContext db, IEnumerable<ReplyPost> replies)
{
using var saverLocks = _saverLocks.Value;
SharedHelper.GetNowTimestamp(out var now);
var repliesWithSignature = replies
.Where(r => r is {SignatureId: not null, Signature: not null}).ToList();
Expand Down Expand Up @@ -56,11 +55,11 @@ join newInReply in signatures on existing.SignatureId equals newInReply.Signatur

var newSignatures = signatures
.ExceptBy(existingSignatures.Select(s => s.SignatureId), s => s.SignatureId).ToList();
var newlyLocked = saverLocks.Acquire(
var newlyLocked = _saverLocks.Value.Acquire(
newSignatures.Select(s => new UniqueSignature(s.SignatureId, s.XxHash3)).ToList());
db.ReplySignatures.AddRange(
newSignatures.IntersectBy(newlyLocked, s => new(s.SignatureId, s.XxHash3)));
return saverLocks.Dispose;
return _saverLocks.Value.Dispose;
}

[SuppressMessage("Class Design", "AV1000:Type name contains the word 'and', which suggests it has multiple purposes")]
Expand Down

0 comments on commit 18dd2c3

Please sign in to comment.