From 493f4525aba7ad7ff690edd4a595e43a7b92e94b Mon Sep 17 00:00:00 2001 From: n0099 Date: Mon, 3 Jun 2024 05:23:44 +0800 Subject: [PATCH] * replace logging different values of `postRevisioningFieldSelector()` that sharing the same key `AuthorUid` of `BasePost` entities with a invoking to `Helper.LogDifferentValuesSharingTheSameKeyInEntities()` @ `AuthorRevisionSaver.Save()` * replace logging different values of `Signature` that sharing the same key `SignatureId` of `ReplyPost` entities with a invoking to `Helper.LogDifferentValuesSharingTheSameKeyInEntities()` @ `Save()` * turn record `ReplySignatureProjection` into class `SignatureIdAndValueEqualityComparer` that implements `EqualityComparer, Array>` @ ReplySignatureSaver.cs @ c#/crawler --- c#/crawler/src/Helper.cs | 18 ++++++++ .../Tieba/Crawl/Saver/AuthorRevisionSaver.cs | 18 +++----- .../Tieba/Crawl/Saver/ReplySignatureSaver.cs | 41 +++++++++---------- 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/c#/crawler/src/Helper.cs b/c#/crawler/src/Helper.cs index fc4fd077..e08f8709 100644 --- a/c#/crawler/src/Helper.cs +++ b/c#/crawler/src/Helper.cs @@ -20,4 +20,22 @@ public static byte[]? SerializedProtoBufWrapperOrNullIfEmpty public static PostContentWrapper? WrapPostContent(IEnumerable? contents) => contents == null ? null : new() {Value = {contents}}; + + public static void LogDifferentValuesSharingTheSameKeyInEntities( + ILogger logger, + IEnumerable entities, + string keyName, + Func keySelector, + Func valueSelector, + IEqualityComparer<(TKey?, TValue?)>? keyAndValueComparer = null) => entities + .GroupBy(keySelector) + .Where(g => g.Count() > 1) + .Flatten2() + .GroupBy(p => (keySelector(p), valueSelector(p)), comparer: keyAndValueComparer) + .GroupBy(g => g.Key.Item1) + .Where(gg => gg.Count() > 1) + .Flatten2() + .ForEach(g => logger.LogWarning( + "Multiple entities with different value of field {} sharing the same key \"{}\": {}", + keyName, g.Key, SharedHelper.UnescapedJsonSerialize(g))); } diff --git a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs index af676769..e659ce9f 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs @@ -48,19 +48,11 @@ private void Save( where TRevision : AuthorRevision { // only takes the first of multiple post from the same author var uniquePosts = posts.DistinctBy(p => p.AuthorUid).ToList(); - if (uniquePosts.Count != posts.Count) ( - from p in posts - group p by p.AuthorUid into g - where g.Count() > 1 - from p in g - group p by (p.AuthorUid, postRevisioningFieldSelector(p)) into g - group g by g.Key.AuthorUid into gg - where gg.Count() > 1 - from g in gg - select g) - .ForEach(g => logger.LogWarning( - "Multiple entities with different value of revisioning field sharing the same TPost.AuthorUid {}: {}", - g.Key, SharedHelper.UnescapedJsonSerialize(g))); + if (uniquePosts.Count != posts.Count) + Helper.LogDifferentValuesSharingTheSameKeyInEntities(logger, posts, + $"{nameof(TPost)}.{nameof(BasePost.AuthorUid)}", + p => p.AuthorUid, + postRevisioningFieldSelector); SharedHelper.GetNowTimestamp(out var now); var existingRevisionOfExistingUsers = dbSet.AsNoTracking() diff --git a/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs index 46137f91..a5d1efc9 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs @@ -26,20 +26,15 @@ public Action Save(CrawlerDbContext db, IEnumerable replies) }).ToList(); if (signatures.Count == 0) return () => { }; if (signatures.Count != repliesWithSignature - .GroupBy(r => new ReplySignatureProjection(r.SignatureId!.Value, r.Signature!)) - .Count()) ( - from r in repliesWithSignature - group r by r.SignatureId into g - where g.Count() > 1 - from r in g - group r by new ReplySignatureProjection(r.SignatureId!.Value, r.Signature!) into g - group g by g.Key.SignatureId into gg - where gg.Count() > 1 - from g in gg - select g) - .ForEach(g => logger.LogWarning( - "Multiple entities with different value of revisioning field sharing the same signature id {}: {}", - g.Key.SignatureId, SharedHelper.UnescapedJsonSerialize(g))); + .GroupBy(r => (r.SignatureId!.Value, r.Signature!), + comparer: SignatureIdAndValueEqualityComparer.Instance) + .Count()) + Helper.LogDifferentValuesSharingTheSameKeyInEntities(logger, + repliesWithSignature, + nameof(ReplyPost.SignatureId), + r => r.SignatureId, + r => r.Signature, + SignatureIdAndValueEqualityComparer.Instance); var existingSignatures = ( from s in db.ReplySignatures.AsTracking() @@ -63,18 +58,20 @@ join newInReply in signatures on existing.SignatureId equals newInReply.Signatur return locks.Dispose; } - private sealed record ReplySignatureProjection(uint SignatureId, byte[] Signature) + private class SignatureIdAndValueEqualityComparer : EqualityComparer<(uint? SignatureId, byte[]? Signature)> { - public bool Equals(ReplySignatureProjection? other) => - other != null - && SignatureId == other.SignatureId - && ByteArrayEqualityComparer.Instance.Equals(Signature, other.Signature); + public static SignatureIdAndValueEqualityComparer Instance { get; } = new(); - public override int GetHashCode() + public override bool Equals((uint? SignatureId, byte[]? Signature) x, (uint? SignatureId, byte[]? Signature) y) => + x == y || + (x.SignatureId == y.SignatureId + && ByteArrayEqualityComparer.Instance.Equals(x.Signature, y.Signature)); + + public override int GetHashCode((uint? SignatureId, byte[]? Signature) obj) { var hash = default(HashCode); - hash.Add(SignatureId); - hash.AddBytes(Signature); + hash.Add(obj.SignatureId); + hash.AddBytes(obj.Signature); return hash.ToHashCode(); } }