Skip to content

Commit

Permalink
* replace all post type strings with new enums PostType or `Crawler…
Browse files Browse the repository at this point in the history
…Locks.Type`

* rename prop `PostType` to `CurrentPostType` @ BasePostSaver.cs
* remove explicitly value for `ThreadClientVersion602` @ CrawlRequestFlag.cs
- prop `RegisteredLocks` @ CrawlerLocks.cs
@ crawler

* suppress Roslyn analyzer rules `SS036` and `SS039` @ GlobalSuppressions.cs
@ c#
  • Loading branch information
n0099 committed May 2, 2024
1 parent c85788a commit 14fed98
Show file tree
Hide file tree
Showing 20 changed files with 50 additions and 34 deletions.
2 changes: 2 additions & 0 deletions c#/GlobalSuppressions.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// ReSharper disable once RedundantUsingDirective
using System.Diagnostics.CodeAnalysis;

[assembly: SuppressMessage("ApiDesign", "SS036:An enum should explicitly specify its values", Justification = "<Pending>")]
[assembly: SuppressMessage("ApiDesign", "SS039:An enum should specify a default value", Justification = "<Pending>")]
[assembly: SuppressMessage("Class Design", "AV1008:Class should not be static")]
[assembly: SuppressMessage("Class Design", "AV1010:Member hides inherited member")]
[assembly: SuppressMessage("Correctness", "SS019:Switch should have default label.")]
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Db/Revision/AuthorRevision.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ public abstract class AuthorRevision
public uint DiscoveredAt { get; set; }
public uint Fid { get; set; }
public long Uid { get; set; }
public required string TriggeredBy { get; set; }
public required PostType TriggeredBy { get; set; }
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Db/Revision/UserRevisions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace tbm.Crawler.Db.Revision;
public abstract class BaseUserRevision : RevisionWithSplitting<BaseUserRevision>
{
public long Uid { get; set; }
public required string TriggeredBy { get; set; }
public required PostType TriggeredBy { get; set; }
}

public class UserRevision : BaseUserRevision
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/EntryPoint.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ protected override void ConfigureContainer(HostBuilderContext context, Container
builder.RegisterType<CrawlerDbContext>();
builder.RegisterType<ClientRequester>();
builder.RegisterType<ClientRequesterTcs>().SingleInstance();
CrawlerLocks.RegisteredLocks.ForEach(type =>
Enum.GetValues<CrawlerLocks.Type>().ForEach(type =>
builder.RegisterType<CrawlerLocks>()
.Keyed<CrawlerLocks>(type)
.WithParameter("lockType", type)
Expand Down
8 changes: 8 additions & 0 deletions c#/crawler/src/PostType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace tbm.Crawler;

public enum PostType
{
Thread,
Reply,
SubReply
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/CrawlRequestFlag.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ namespace tbm.Crawler.Tieba.Crawl;
public enum CrawlRequestFlag
{
None = 0,
ThreadClientVersion602 = 1
ThreadClientVersion602
}
6 changes: 3 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Crawler/ThreadLateCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ public class ThreadLateCrawler(
ILogger<ThreadLateCrawler> logger,
ClientRequester requester,
ClientRequesterTcs requesterTcs,
IIndex<string, CrawlerLocks> locks,
IIndex<CrawlerLocks.Type, CrawlerLocks> locks,
Fid fid)
{
private readonly CrawlerLocks _locks = locks["threadLate"]; // singleton
private readonly CrawlerLocks _locks = locks[CrawlerLocks.Type.ThreadLate]; // singleton

public delegate ThreadLateCrawler New(Fid fid);

Expand Down Expand Up @@ -58,7 +58,7 @@ public class ThreadLateCrawler(
throw new TiebaException("Error from tieba client.") {Data = {{"raw", json}}};
}

var thread = json.GetProperty("thread");
var thread = json.GetProperty(Enum.GetName(PostType.Thread)!.ToLower(CultureInfo.InvariantCulture));
#pragma warning disable S3358 // Ternary operators should not be nested
return thread.TryGetProperty("thread_info", out var threadInfo)
? threadInfo.TryGetProperty("phone_type", out var phoneType)
Expand Down
12 changes: 9 additions & 3 deletions c#/crawler/src/Tieba/Crawl/CrawlerLocks.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
namespace tbm.Crawler.Tieba.Crawl;

public class CrawlerLocks(ILogger<CrawlerLocks> logger, IConfiguration config, string lockType)
public class CrawlerLocks(ILogger<CrawlerLocks> logger, IConfiguration config, CrawlerLocks.Type lockType)
: WithLogTrace(config, $"CrawlerLocks:{lockType}")
{
private readonly IConfigurationSection _config = config.GetSection($"CrawlerLocks:{lockType}");
Expand All @@ -9,8 +9,14 @@ public class CrawlerLocks(ILogger<CrawlerLocks> logger, IConfiguration config, s
// inner value of field _failed with type ushort refers to failed times on this page and lockId before retry
private readonly ConcurrentDictionary<LockId, ConcurrentDictionary<Page, FailureCount>> _failed = new();

public static IEnumerable<string> RegisteredLocks { get; } = ["thread", "threadLate", "reply", "subReply"];
public string LockType { get; } = lockType;
public enum Type
{
Thread,
ThreadLate,
Reply,
SubReply
}
public Type LockType { get; } = lockType;

public IReadOnlySet<Page> AcquireRange(LockId lockId, IEnumerable<Page> pages)
{
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public virtual void Dispose()
var savedPosts = Posts.IsEmpty ? null : postSaver.Save(db);

var userSaver = userSaverFactory(_users);
userSaver.Save(db, postSaver.PostType, postSaver.UserFieldChangeIgnorance);
userSaver.Save(db, postSaver.CurrentPostType, postSaver.UserFieldChangeIgnorance);

BeforeCommitSaveHook(db, userSaver);
try
Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ public class ReplyCrawlFacade(
ReplyCrawler.New crawlerFactory,
Fid fid,
Tid tid,
IIndex<string, CrawlerLocks> locks,
IIndex<CrawlerLocks.Type, CrawlerLocks> locks,
ReplyParser postParser,
ReplySaver.New postSaverFactory,
UserParser.New userParserFactory,
UserSaver.New userSaverFactory,
CrawlerDbContext.New dbContextFactory,
SonicPusher sonicPusher)
: BaseCrawlFacade<ReplyPost, BaseReplyRevision, ReplyResponse, Reply>(
crawlerFactory(fid, tid), fid, new(fid, tid), locks["reply"],
crawlerFactory(fid, tid), fid, new(fid, tid), locks[CrawlerLocks.Type.Reply],
postParser, postSaverFactory.Invoke,
userParserFactory.Invoke, userSaverFactory.Invoke)
{
Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Facade/SubReplyCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ public class SubReplyCrawlFacade(
Fid fid,
Tid tid,
Pid pid,
IIndex<string, CrawlerLocks> locks,
IIndex<CrawlerLocks.Type, CrawlerLocks> locks,
SubReplyParser postParser,
SubReplySaver.New postSaverFactory,
UserParser.New userParserFactory,
UserSaver.New userSaverFactory,
SonicPusher sonicPusher)
: BaseCrawlFacade<SubReplyPost, BaseSubReplyRevision, SubReplyResponse, SubReply>(
crawlerFactory(tid, pid), fid, new(fid, tid, pid), locks["subReply"],
crawlerFactory(tid, pid), fid, new(fid, tid, pid), locks[CrawlerLocks.Type.SubReply],
postParser, postSaverFactory.Invoke,
userParserFactory.Invoke, userSaverFactory.Invoke)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ public class ThreadArchiveCrawlFacade(
ThreadArchiveCrawler.New crawlerFactory,
string forumName,
Fid fid,
IIndex<string, CrawlerLocks> locks,
IIndex<CrawlerLocks.Type, CrawlerLocks> locks,
ThreadParser postParser,
ThreadSaver.New postSaverFactory,
UserParser.New userParserFactory,
Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ public class ThreadCrawlFacade(
ThreadCrawler.New crawlerFactory,
string forumName,
Fid fid,
IIndex<string, CrawlerLocks> locks,
IIndex<CrawlerLocks.Type, CrawlerLocks> locks,
ThreadParser postParser,
ThreadSaver.New postSaverFactory,
UserParser.New userParserFactory,
UserSaver.New userSaverFactory)
: BaseCrawlFacade<ThreadPost, BaseThreadRevision, ThreadResponse, Thread>(
crawlerFactory(forumName), fid, new(fid), locks["thread"],
crawlerFactory(forumName), fid, new(fid), locks[CrawlerLocks.Type.Thread],
postParser, postSaverFactory.Invoke,
userParserFactory.Invoke, userSaverFactory.Invoke)
{
Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

namespace tbm.Crawler.Tieba.Crawl.Saver;

public class AuthorRevisionSaver(string triggeredByPostType)
public class AuthorRevisionSaver(PostType triggeredByPostType)
{
// locks only using fid and uid field values from AuthorRevision
// this prevents inserting multiple entities with similar time and other fields with the same values
private static readonly HashSet<(Fid Fid, Uid Uid)> AuthorExpGradeLocks = [];
private readonly List<(Fid Fid, Uid Uid)> _savedRevisions = [];

public delegate AuthorRevisionSaver New(string triggeredByPostType);
public delegate AuthorRevisionSaver New(PostType triggeredByPostType);

public Action SaveAuthorExpGradeRevisions<TPostWithAuthorExpGrade>
(CrawlerDbContext db, IReadOnlyCollection<TPostWithAuthorExpGrade> posts)
Expand Down
6 changes: 3 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/BasePostSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ public abstract class BasePostSaver<TPost, TBaseRevision>(
ILogger<BasePostSaver<TPost, TBaseRevision>> logger,
ConcurrentDictionary<PostId, TPost> posts,
AuthorRevisionSaver.New authorRevisionSaverFactory,
string postType)
PostType currentPostType)
: BaseSaver<TBaseRevision>(logger)
where TPost : class, IPost
where TBaseRevision : class, IRevision
Expand All @@ -17,9 +17,9 @@ public abstract class BasePostSaver<TPost, TBaseRevision>(

public virtual IFieldChangeIgnorance.FieldChangeIgnoranceDelegates
UserFieldChangeIgnorance => throw new NotSupportedException();
public string PostType { get; } = postType;
public PostType CurrentPostType { get; } = currentPostType;
protected ConcurrentDictionary<PostId, TPost> Posts { get; } = posts;
protected AuthorRevisionSaver AuthorRevisionSaver { get; } = authorRevisionSaverFactory(postType);
protected AuthorRevisionSaver AuthorRevisionSaver { get; } = authorRevisionSaverFactory(currentPostType);

[SuppressMessage("Misc", "AV1225:Method that raises an event should be protected virtual and be named 'On' followed by event name")]
public void OnPostSaveEvent() => PostSaveEvent();
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public partial class ReplySaver(
ConcurrentDictionary<PostId, ReplyPost> posts,
AuthorRevisionSaver.New authorRevisionSaverFactory)
: BasePostSaver<ReplyPost, BaseReplyRevision>(
logger, posts, authorRevisionSaverFactory, "reply")
logger, posts, authorRevisionSaverFactory, PostType.Reply)
{
public delegate ReplySaver New(ConcurrentDictionary<PostId, ReplyPost> posts);

Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public class SubReplySaver(
ConcurrentDictionary<PostId, SubReplyPost> posts,
AuthorRevisionSaver.New authorRevisionSaverFactory)
: BasePostSaver<SubReplyPost, BaseSubReplyRevision>(
logger, posts, authorRevisionSaverFactory, "subReply")
logger, posts, authorRevisionSaverFactory, PostType.SubReply)
{
public delegate SubReplySaver New(ConcurrentDictionary<PostId, SubReplyPost> posts);

Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public class ThreadSaver(
ConcurrentDictionary<Tid, ThreadPost> posts,
AuthorRevisionSaver.New authorRevisionSaverFactory)
: BasePostSaver<ThreadPost, BaseThreadRevision>(
logger, posts, authorRevisionSaverFactory, "thread")
logger, posts, authorRevisionSaverFactory, PostType.Thread)
{
public delegate ThreadSaver New(ConcurrentDictionary<Tid, ThreadPost> posts);

Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public partial class UserSaver(ILogger<UserSaver> logger, ConcurrentDictionary<U

public void Save(
CrawlerDbContext db,
string postType,
PostType postType,
IFieldChangeIgnorance.FieldChangeIgnoranceDelegates userFieldChangeIgnorance)
{
if (users.IsEmpty) return;
Expand Down
14 changes: 7 additions & 7 deletions c#/crawler/src/Worker/RetryCrawlWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ namespace tbm.Crawler.Worker;

public class RetryCrawlWorker(
ILogger<RetryCrawlWorker> logger,
IIndex<string, CrawlerLocks> registeredLocksLookup,
IIndex<CrawlerLocks.Type, CrawlerLocks> registeredLocksLookup,
CrawlPost crawlPost,
Func<Owned<CrawlerDbContext.NewDefault>> dbContextDefaultFactory,
Func<Owned<ThreadLateCrawlFacade.New>> threadLateCrawlFacadeFactory,
Expand All @@ -13,12 +13,12 @@ public class RetryCrawlWorker(
{
protected override async Task DoWork(CancellationToken stoppingToken)
{
foreach (var lockType in CrawlerLocks.RegisteredLocks)
foreach (var lockType in Enum.GetValues<CrawlerLocks.Type>())
{
if (stoppingToken.IsCancellationRequested) return;
var failed = registeredLocksLookup[lockType].RetryAllFailed();
if (failed.Count == 0) continue; // skip current lock type if there's nothing needs to retry
if (lockType == "threadLate")
if (lockType == CrawlerLocks.Type.ThreadLate)
{
await RetryThreadLate(failed, stoppingToken);

Expand All @@ -30,7 +30,7 @@ protected override async Task DoWork(CancellationToken stoppingToken)
}

private Func<KeyValuePair<CrawlerLocks.LockId, IReadOnlyDictionary<Page, FailureCount>>, Task> RetryFailed
(string lockType, CancellationToken stoppingToken = default) => async failedPagesKeyByLockId =>
(CrawlerLocks.Type lockType, CancellationToken stoppingToken = default) => async failedPagesKeyByLockId =>
{
if (stoppingToken.IsCancellationRequested) return;
var ((fid, tid, pid), failureCountsKeyByPage) = failedPagesKeyByLockId;
Expand All @@ -39,15 +39,15 @@ protected override async Task DoWork(CancellationToken stoppingToken)

switch (lockType)
{
case "thread":
case CrawlerLocks.Type.Thread:
await RetryThread(fid, pages,
failureCountsKeyByPage.Count, FailureCountSelector, stoppingToken);
break;
case "reply" when tid != null:
case CrawlerLocks.Type.Reply when tid != null:
await RetryReply(fid, tid.Value, pages,
failureCountsKeyByPage.Count, FailureCountSelector, stoppingToken);
break;
case "subReply" when tid != null && pid != null:
case CrawlerLocks.Type.SubReply when tid != null && pid != null:
await RetrySubReply(fid, tid.Value, pid.Value, pages,
failureCountsKeyByPage.Count, FailureCountSelector, stoppingToken);
break;
Expand Down

0 comments on commit 14fed98

Please sign in to comment.