diff --git a/c#/crawler/src/Helper.cs b/c#/crawler/src/Helper.cs index fc4fd077..e08f8709 100644 --- a/c#/crawler/src/Helper.cs +++ b/c#/crawler/src/Helper.cs @@ -20,4 +20,22 @@ public static byte[]? SerializedProtoBufWrapperOrNullIfEmpty public static PostContentWrapper? WrapPostContent(IEnumerable? contents) => contents == null ? null : new() {Value = {contents}}; + + public static void LogDifferentValuesSharingTheSameKeyInEntities( + ILogger logger, + IEnumerable entities, + string keyName, + Func keySelector, + Func valueSelector, + IEqualityComparer<(TKey?, TValue?)>? keyAndValueComparer = null) => entities + .GroupBy(keySelector) + .Where(g => g.Count() > 1) + .Flatten2() + .GroupBy(p => (keySelector(p), valueSelector(p)), comparer: keyAndValueComparer) + .GroupBy(g => g.Key.Item1) + .Where(gg => gg.Count() > 1) + .Flatten2() + .ForEach(g => logger.LogWarning( + "Multiple entities with different value of field {} sharing the same key \"{}\": {}", + keyName, g.Key, SharedHelper.UnescapedJsonSerialize(g))); } diff --git a/c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs index ce2d3610..13644ce7 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs @@ -38,7 +38,6 @@ public virtual void Dispose() locks.ReleaseRange(lockId, _lockingPages); } - [SuppressMessage("Major Bug", "S1751:Loops with at most one iteration should be refactored")] public SaverChangeSet? SaveCrawled(CancellationToken stoppingToken = default) { var retryTimes = 0; diff --git a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs index af676769..e659ce9f 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs @@ -48,19 +48,11 @@ private void Save( where TRevision : AuthorRevision { // only takes the first of multiple post from the same author var uniquePosts = posts.DistinctBy(p => p.AuthorUid).ToList(); - if (uniquePosts.Count != posts.Count) ( - from p in posts - group p by p.AuthorUid into g - where g.Count() > 1 - from p in g - group p by (p.AuthorUid, postRevisioningFieldSelector(p)) into g - group g by g.Key.AuthorUid into gg - where gg.Count() > 1 - from g in gg - select g) - .ForEach(g => logger.LogWarning( - "Multiple entities with different value of revisioning field sharing the same TPost.AuthorUid {}: {}", - g.Key, SharedHelper.UnescapedJsonSerialize(g))); + if (uniquePosts.Count != posts.Count) + Helper.LogDifferentValuesSharingTheSameKeyInEntities(logger, posts, + $"{nameof(TPost)}.{nameof(BasePost.AuthorUid)}", + p => p.AuthorUid, + postRevisioningFieldSelector); SharedHelper.GetNowTimestamp(out var now); var existingRevisionOfExistingUsers = dbSet.AsNoTracking() diff --git a/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs index 46137f91..83821df7 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs @@ -26,20 +26,15 @@ public Action Save(CrawlerDbContext db, IEnumerable replies) }).ToList(); if (signatures.Count == 0) return () => { }; if (signatures.Count != repliesWithSignature - .GroupBy(r => new ReplySignatureProjection(r.SignatureId!.Value, r.Signature!)) - .Count()) ( - from r in repliesWithSignature - group r by r.SignatureId into g - where g.Count() > 1 - from r in g - group r by new ReplySignatureProjection(r.SignatureId!.Value, r.Signature!) into g - group g by g.Key.SignatureId into gg - where gg.Count() > 1 - from g in gg - select g) - .ForEach(g => logger.LogWarning( - "Multiple entities with different value of revisioning field sharing the same signature id {}: {}", - g.Key.SignatureId, SharedHelper.UnescapedJsonSerialize(g))); + .GroupBy(r => (r.SignatureId!.Value, r.Signature!), + comparer: SignatureIdAndValueEqualityComparer.Instance) + .Count()) + Helper.LogDifferentValuesSharingTheSameKeyInEntities(logger, + repliesWithSignature, + nameof(ReplyPost.SignatureId), + r => r.SignatureId, + r => r.Signature, + SignatureIdAndValueEqualityComparer.Instance); var existingSignatures = ( from s in db.ReplySignatures.AsTracking() @@ -63,18 +58,21 @@ join newInReply in signatures on existing.SignatureId equals newInReply.Signatur return locks.Dispose; } - private sealed record ReplySignatureProjection(uint SignatureId, byte[] Signature) + [SuppressMessage("Class Design", "AV1000:Type name contains the word 'and', which suggests it has multiple purposes")] + private sealed class SignatureIdAndValueEqualityComparer : EqualityComparer<(uint? SignatureId, byte[]? Signature)> { - public bool Equals(ReplySignatureProjection? other) => - other != null - && SignatureId == other.SignatureId - && ByteArrayEqualityComparer.Instance.Equals(Signature, other.Signature); + public static SignatureIdAndValueEqualityComparer Instance { get; } = new(); - public override int GetHashCode() + public override bool Equals((uint? SignatureId, byte[]? Signature) x, (uint? SignatureId, byte[]? Signature) y) => + x == y || + (x.SignatureId == y.SignatureId + && ByteArrayEqualityComparer.Instance.Equals(x.Signature, y.Signature)); + + public override int GetHashCode((uint? SignatureId, byte[]? Signature) obj) { var hash = default(HashCode); - hash.Add(SignatureId); - hash.AddBytes(Signature); + hash.Add(obj.SignatureId); + hash.AddBytes(obj.Signature); return hash.ToHashCode(); } } diff --git a/c#/imagePipeline/src/Consumer/MetadataConsumer.cs b/c#/imagePipeline/src/Consumer/MetadataConsumer.cs index 65a6f9a3..b37272c8 100644 --- a/c#/imagePipeline/src/Consumer/MetadataConsumer.cs +++ b/c#/imagePipeline/src/Consumer/MetadataConsumer.cs @@ -383,7 +383,6 @@ private static partial class ExifDateTimeTagValuesParser } : null; - [SuppressMessage("Performance", "CA1852:Seal internal types")] - public record DateTimeAndOffset(DateTime DateTime, string? Offset); + public sealed record DateTimeAndOffset(DateTime DateTime, string? Offset); } } diff --git a/c#/shared/tbm.Shared.csproj b/c#/shared/tbm.Shared.csproj index 3c5dfcbf..d5de73e2 100644 --- a/c#/shared/tbm.Shared.csproj +++ b/c#/shared/tbm.Shared.csproj @@ -18,12 +18,12 @@ - + - + - +