diff --git a/c#/GlobalSuppressions.cs b/c#/GlobalSuppressions.cs index 551a02f9..a7ae8735 100644 --- a/c#/GlobalSuppressions.cs +++ b/c#/GlobalSuppressions.cs @@ -1,6 +1,8 @@ // ReSharper disable once RedundantUsingDirective using System.Diagnostics.CodeAnalysis; +[assembly: SuppressMessage("ApiDesign", "SS036:An enum should explicitly specify its values", Justification = "")] +[assembly: SuppressMessage("ApiDesign", "SS039:An enum should specify a default value", Justification = "")] [assembly: SuppressMessage("Class Design", "AV1008:Class should not be static")] [assembly: SuppressMessage("Class Design", "AV1010:Member hides inherited member")] [assembly: SuppressMessage("Correctness", "SS019:Switch should have default label.")] diff --git a/c#/crawler/src/Db/Revision/AuthorRevision.cs b/c#/crawler/src/Db/Revision/AuthorRevision.cs index 0407759b..28184cec 100644 --- a/c#/crawler/src/Db/Revision/AuthorRevision.cs +++ b/c#/crawler/src/Db/Revision/AuthorRevision.cs @@ -7,5 +7,5 @@ public abstract class AuthorRevision public uint DiscoveredAt { get; set; } public uint Fid { get; set; } public long Uid { get; set; } - public required string TriggeredBy { get; set; } + public required PostType TriggeredBy { get; set; } } diff --git a/c#/crawler/src/Db/Revision/UserRevisions.cs b/c#/crawler/src/Db/Revision/UserRevisions.cs index bb987425..278a0a89 100644 --- a/c#/crawler/src/Db/Revision/UserRevisions.cs +++ b/c#/crawler/src/Db/Revision/UserRevisions.cs @@ -6,7 +6,7 @@ namespace tbm.Crawler.Db.Revision; public abstract class BaseUserRevision : RevisionWithSplitting { public long Uid { get; set; } - public required string TriggeredBy { get; set; } + public required PostType TriggeredBy { get; set; } } public class UserRevision : BaseUserRevision diff --git a/c#/crawler/src/EntryPoint.cs b/c#/crawler/src/EntryPoint.cs index e0deeb0e..a618d16e 100644 --- a/c#/crawler/src/EntryPoint.cs +++ b/c#/crawler/src/EntryPoint.cs @@ -42,7 +42,7 @@ protected override void ConfigureContainer(HostBuilderContext context, Container builder.RegisterType(); builder.RegisterType(); builder.RegisterType().SingleInstance(); - CrawlerLocks.RegisteredLocks.ForEach(type => + Enum.GetValues().ForEach(type => builder.RegisterType() .Keyed(type) .WithParameter("lockType", type) diff --git a/c#/crawler/src/PostType.cs b/c#/crawler/src/PostType.cs new file mode 100644 index 00000000..cfb6a65d --- /dev/null +++ b/c#/crawler/src/PostType.cs @@ -0,0 +1,8 @@ +namespace tbm.Crawler; + +public enum PostType +{ + Thread, + Reply, + SubReply +} diff --git a/c#/crawler/src/Tieba/Crawl/CrawlRequestFlag.cs b/c#/crawler/src/Tieba/Crawl/CrawlRequestFlag.cs index 69419035..e58248ec 100644 --- a/c#/crawler/src/Tieba/Crawl/CrawlRequestFlag.cs +++ b/c#/crawler/src/Tieba/Crawl/CrawlRequestFlag.cs @@ -3,5 +3,5 @@ namespace tbm.Crawler.Tieba.Crawl; public enum CrawlRequestFlag { None = 0, - ThreadClientVersion602 = 1 + ThreadClientVersion602 } diff --git a/c#/crawler/src/Tieba/Crawl/Crawler/ThreadLateCrawler.cs b/c#/crawler/src/Tieba/Crawl/Crawler/ThreadLateCrawler.cs index b0b04671..d663bfca 100644 --- a/c#/crawler/src/Tieba/Crawl/Crawler/ThreadLateCrawler.cs +++ b/c#/crawler/src/Tieba/Crawl/Crawler/ThreadLateCrawler.cs @@ -4,10 +4,10 @@ public class ThreadLateCrawler( ILogger logger, ClientRequester requester, ClientRequesterTcs requesterTcs, - IIndex locks, + IIndex locks, Fid fid) { - private readonly CrawlerLocks _locks = locks["threadLate"]; // singleton + private readonly CrawlerLocks _locks = locks[CrawlerLocks.Type.ThreadLate]; // singleton public delegate ThreadLateCrawler New(Fid fid); @@ -58,7 +58,7 @@ public class ThreadLateCrawler( throw new TiebaException("Error from tieba client.") {Data = {{"raw", json}}}; } - var thread = json.GetProperty("thread"); + var thread = json.GetProperty(Enum.GetName(PostType.Thread)!.ToLower(CultureInfo.InvariantCulture)); #pragma warning disable S3358 // Ternary operators should not be nested return thread.TryGetProperty("thread_info", out var threadInfo) ? threadInfo.TryGetProperty("phone_type", out var phoneType) diff --git a/c#/crawler/src/Tieba/Crawl/CrawlerLocks.cs b/c#/crawler/src/Tieba/Crawl/CrawlerLocks.cs index 92cba78e..97e9f053 100644 --- a/c#/crawler/src/Tieba/Crawl/CrawlerLocks.cs +++ b/c#/crawler/src/Tieba/Crawl/CrawlerLocks.cs @@ -1,6 +1,6 @@ namespace tbm.Crawler.Tieba.Crawl; -public class CrawlerLocks(ILogger logger, IConfiguration config, string lockType) +public class CrawlerLocks(ILogger logger, IConfiguration config, CrawlerLocks.Type lockType) : WithLogTrace(config, $"CrawlerLocks:{lockType}") { private readonly IConfigurationSection _config = config.GetSection($"CrawlerLocks:{lockType}"); @@ -9,8 +9,14 @@ public class CrawlerLocks(ILogger logger, IConfiguration config, s // inner value of field _failed with type ushort refers to failed times on this page and lockId before retry private readonly ConcurrentDictionary> _failed = new(); - public static IEnumerable RegisteredLocks { get; } = ["thread", "threadLate", "reply", "subReply"]; - public string LockType { get; } = lockType; + public enum Type + { + Thread, + ThreadLate, + Reply, + SubReply + } + public Type LockType { get; } = lockType; public IReadOnlySet AcquireRange(LockId lockId, IEnumerable pages) { diff --git a/c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs index ddd0c938..b72b10aa 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs @@ -50,7 +50,7 @@ public virtual void Dispose() var savedPosts = Posts.IsEmpty ? null : postSaver.Save(db); var userSaver = userSaverFactory(_users); - userSaver.Save(db, postSaver.PostType, postSaver.UserFieldChangeIgnorance); + userSaver.Save(db, postSaver.CurrentPostType, postSaver.UserFieldChangeIgnorance); BeforeCommitSaveHook(db, userSaver); try diff --git a/c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs index ae2d274c..c19594ec 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs @@ -4,7 +4,7 @@ public class ReplyCrawlFacade( ReplyCrawler.New crawlerFactory, Fid fid, Tid tid, - IIndex locks, + IIndex locks, ReplyParser postParser, ReplySaver.New postSaverFactory, UserParser.New userParserFactory, @@ -12,7 +12,7 @@ public class ReplyCrawlFacade( CrawlerDbContext.New dbContextFactory, SonicPusher sonicPusher) : BaseCrawlFacade( - crawlerFactory(fid, tid), fid, new(fid, tid), locks["reply"], + crawlerFactory(fid, tid), fid, new(fid, tid), locks[CrawlerLocks.Type.Reply], postParser, postSaverFactory.Invoke, userParserFactory.Invoke, userSaverFactory.Invoke) { diff --git a/c#/crawler/src/Tieba/Crawl/Facade/SubReplyCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/SubReplyCrawlFacade.cs index b5c00303..11c4a3be 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/SubReplyCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/SubReplyCrawlFacade.cs @@ -5,14 +5,14 @@ public class SubReplyCrawlFacade( Fid fid, Tid tid, Pid pid, - IIndex locks, + IIndex locks, SubReplyParser postParser, SubReplySaver.New postSaverFactory, UserParser.New userParserFactory, UserSaver.New userSaverFactory, SonicPusher sonicPusher) : BaseCrawlFacade( - crawlerFactory(tid, pid), fid, new(fid, tid, pid), locks["subReply"], + crawlerFactory(tid, pid), fid, new(fid, tid, pid), locks[CrawlerLocks.Type.SubReply], postParser, postSaverFactory.Invoke, userParserFactory.Invoke, userSaverFactory.Invoke) { diff --git a/c#/crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs index 489c0e41..ee61f408 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs @@ -4,7 +4,7 @@ public class ThreadArchiveCrawlFacade( ThreadArchiveCrawler.New crawlerFactory, string forumName, Fid fid, - IIndex locks, + IIndex locks, ThreadParser postParser, ThreadSaver.New postSaverFactory, UserParser.New userParserFactory, diff --git a/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs index 41b65213..76dad0c3 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs @@ -4,13 +4,13 @@ public class ThreadCrawlFacade( ThreadCrawler.New crawlerFactory, string forumName, Fid fid, - IIndex locks, + IIndex locks, ThreadParser postParser, ThreadSaver.New postSaverFactory, UserParser.New userParserFactory, UserSaver.New userSaverFactory) : BaseCrawlFacade( - crawlerFactory(forumName), fid, new(fid), locks["thread"], + crawlerFactory(forumName), fid, new(fid), locks[CrawlerLocks.Type.Thread], postParser, postSaverFactory.Invoke, userParserFactory.Invoke, userSaverFactory.Invoke) { diff --git a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs index 3844dc8f..ab3b56b4 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs @@ -2,14 +2,14 @@ namespace tbm.Crawler.Tieba.Crawl.Saver; -public class AuthorRevisionSaver(string triggeredByPostType) +public class AuthorRevisionSaver(PostType triggeredByPostType) { // locks only using fid and uid field values from AuthorRevision // this prevents inserting multiple entities with similar time and other fields with the same values private static readonly HashSet<(Fid Fid, Uid Uid)> AuthorExpGradeLocks = []; private readonly List<(Fid Fid, Uid Uid)> _savedRevisions = []; - public delegate AuthorRevisionSaver New(string triggeredByPostType); + public delegate AuthorRevisionSaver New(PostType triggeredByPostType); public Action SaveAuthorExpGradeRevisions (CrawlerDbContext db, IReadOnlyCollection posts) diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/BasePostSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/BasePostSaver.cs index 21d5ee62..42398f70 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/BasePostSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/BasePostSaver.cs @@ -6,7 +6,7 @@ public abstract class BasePostSaver( ILogger> logger, ConcurrentDictionary posts, AuthorRevisionSaver.New authorRevisionSaverFactory, - string postType) + PostType currentPostType) : BaseSaver(logger) where TPost : class, IPost where TBaseRevision : class, IRevision @@ -17,9 +17,9 @@ public abstract class BasePostSaver( public virtual IFieldChangeIgnorance.FieldChangeIgnoranceDelegates UserFieldChangeIgnorance => throw new NotSupportedException(); - public string PostType { get; } = postType; + public PostType CurrentPostType { get; } = currentPostType; protected ConcurrentDictionary Posts { get; } = posts; - protected AuthorRevisionSaver AuthorRevisionSaver { get; } = authorRevisionSaverFactory(postType); + protected AuthorRevisionSaver AuthorRevisionSaver { get; } = authorRevisionSaverFactory(currentPostType); [SuppressMessage("Misc", "AV1225:Method that raises an event should be protected virtual and be named 'On' followed by event name")] public void OnPostSaveEvent() => PostSaveEvent(); diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs index a77fd202..f63415e1 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs @@ -8,7 +8,7 @@ public partial class ReplySaver( ConcurrentDictionary posts, AuthorRevisionSaver.New authorRevisionSaverFactory) : BasePostSaver( - logger, posts, authorRevisionSaverFactory, "reply") + logger, posts, authorRevisionSaverFactory, PostType.Reply) { public delegate ReplySaver New(ConcurrentDictionary posts); diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs index 400212d8..c16e8b47 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs @@ -7,7 +7,7 @@ public class SubReplySaver( ConcurrentDictionary posts, AuthorRevisionSaver.New authorRevisionSaverFactory) : BasePostSaver( - logger, posts, authorRevisionSaverFactory, "subReply") + logger, posts, authorRevisionSaverFactory, PostType.SubReply) { public delegate SubReplySaver New(ConcurrentDictionary posts); diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs index 9d398932..a941918f 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs @@ -7,7 +7,7 @@ public class ThreadSaver( ConcurrentDictionary posts, AuthorRevisionSaver.New authorRevisionSaverFactory) : BasePostSaver( - logger, posts, authorRevisionSaverFactory, "thread") + logger, posts, authorRevisionSaverFactory, PostType.Thread) { public delegate ThreadSaver New(ConcurrentDictionary posts); diff --git a/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs index 85a81246..614baa76 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs @@ -41,7 +41,7 @@ public partial class UserSaver(ILogger logger, ConcurrentDictionary logger, - IIndex registeredLocksLookup, + IIndex registeredLocksLookup, CrawlPost crawlPost, Func> dbContextDefaultFactory, Func> threadLateCrawlFacadeFactory, @@ -13,12 +13,12 @@ public class RetryCrawlWorker( { protected override async Task DoWork(CancellationToken stoppingToken) { - foreach (var lockType in CrawlerLocks.RegisteredLocks) + foreach (var lockType in Enum.GetValues()) { if (stoppingToken.IsCancellationRequested) return; var failed = registeredLocksLookup[lockType].RetryAllFailed(); if (failed.Count == 0) continue; // skip current lock type if there's nothing needs to retry - if (lockType == "threadLate") + if (lockType == CrawlerLocks.Type.ThreadLate) { await RetryThreadLate(failed, stoppingToken); @@ -30,7 +30,7 @@ protected override async Task DoWork(CancellationToken stoppingToken) } private Func>, Task> RetryFailed - (string lockType, CancellationToken stoppingToken = default) => async failedPagesKeyByLockId => + (CrawlerLocks.Type lockType, CancellationToken stoppingToken = default) => async failedPagesKeyByLockId => { if (stoppingToken.IsCancellationRequested) return; var ((fid, tid, pid), failureCountsKeyByPage) = failedPagesKeyByLockId; @@ -39,15 +39,15 @@ protected override async Task DoWork(CancellationToken stoppingToken) switch (lockType) { - case "thread": + case CrawlerLocks.Type.Thread: await RetryThread(fid, pages, failureCountsKeyByPage.Count, FailureCountSelector, stoppingToken); break; - case "reply" when tid != null: + case CrawlerLocks.Type.Reply when tid != null: await RetryReply(fid, tid.Value, pages, failureCountsKeyByPage.Count, FailureCountSelector, stoppingToken); break; - case "subReply" when tid != null && pid != null: + case CrawlerLocks.Type.SubReply when tid != null && pid != null: await RetrySubReply(fid, tid.Value, pid.Value, pages, failureCountsKeyByPage.Count, FailureCountSelector, stoppingToken); break;