Skip to content

Commit

Permalink
* fix the relationship between entities TheadPost and `LatestReplie…
Browse files Browse the repository at this point in the history
…r` should be many-to-one instead of one-to-one @ `CrawlerDbContext.OnModelCreating()`

* change param `existingPostPredicate` with type of expression to `postQueryTransformer` with callback @ `PostSaver.Save()`
* rename field `UserId` to `Uid` @ ReplySignature.cs
* move `ValueTuple<,>` to record `UniqueAuthorRevision` @ AuthorRevisionSaver.cs
@ c#/crawler
  • Loading branch information
n0099 committed Jul 22, 2024
1 parent b81b64a commit 644491c
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 18 deletions.
5 changes: 2 additions & 3 deletions c#/crawler/src/Db/CrawlerDbContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,10 @@ protected override void OnModelCreating(ModelBuilder b)
b.Entity<User>().ToTable("tbmc_user");
b.Entity<LatestReplier>().ToTable("tbmc_latestReplier");
b.Entity<LatestReplier>().Property(e => e.DisplayName).HasConversion<byte[]>();
b.Entity<LatestReplier>().HasOne<ThreadPost>().WithOne(e => e.LatestReplier)
.HasForeignKey<ThreadPost>(e => e.LatestReplierId);
b.Entity<LatestReplierRevision>().ToTable("tbmcr_latestReplier").HasKey(e => new {e.TakenAt, e.Uid});
b.Entity<LatestReplierRevision>().Property(e => e.DisplayName).HasConversion<byte[]>();
b.Entity<ThreadPost>().ToTable($"tbmc_f{Fid}_thread");
b.Entity<ThreadPost>().ToTable($"tbmc_f{Fid}_thread")
.HasOne<LatestReplier>(e => e.LatestReplier).WithMany().HasForeignKey(e => e.LatestReplierId);
b.Entity<ThreadMissingFirstReply>().ToTable("tbmc_thread_missingFirstReply");
b.Entity<ReplyPost>().ToTable($"tbmc_f{Fid}_reply");
b.Entity<ReplyContent>().ToTable($"tbmc_f{Fid}_reply_content");
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Db/Post/ReplySignature.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ namespace tbm.Crawler.Db.Post;

public class ReplySignature : RowVersionedEntity
{
public long UserId { get; set; }
public long Uid { get; set; }

Check notice on line 6 in c#/crawler/src/Db/Post/ReplySignature.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / ReSharper

"[UseSymbolAlias] Use type alias 'Uid'" on /Users/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/Post/ReplySignature.cs(6,148)

Check notice on line 6 in c#/crawler/src/Db/Post/ReplySignature.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / ReSharper

"[UseSymbolAlias] Use type alias 'Uid'" on /home/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/Post/ReplySignature.cs(6,148)

Check notice on line 6 in c#/crawler/src/Db/Post/ReplySignature.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / ReSharper

"[UseSymbolAlias] Use type alias 'Uid'" on D:\a\open-tbm\open-tbm\c#\crawler\src\Db\Post\ReplySignature.cs(6,148)
public uint SignatureId { get; set; }
public required byte[] XxHash3 { get; set; }
public required byte[] ProtoBufBytes { get; set; }
Expand Down
5 changes: 2 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/PostSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@ protected SaverChangeSet<TPost> Save<TRevision>(
CrawlerDbContext db,
Func<TPost, PostId> postIdSelector,
Func<TPost, TRevision> revisionFactory,
ExpressionStarter<TPost> existingPostPredicate)
Func<IQueryable<TPost>, IQueryable<TPost>> postQueryTransformer)
where TRevision : TBaseRevision
{
var existingPostsKeyById = db.Set<TPost>().AsTracking()
.Where(existingPostPredicate).ToDictionary(postIdSelector);
var existingPostsKeyById = postQueryTransformer(db.Set<TPost>().AsTracking()).ToDictionary(postIdSelector);

// clone before entities get mutated by SaverWithRevision.SaveEntitiesWithRevision()
var existingPostsBeforeMerge = existingPostsKeyById.Select(pair => (TPost)pair.Value.Clone()).ToList();
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public override SaverChangeSet<ReplyPost> Save(CrawlerDbContext db)
{
var changeSet = Save(db, r => r.Pid,
r => new ReplyRevision {TakenAt = r.UpdatedAt ?? r.CreatedAt, Pid = r.Pid},
LinqKit.PredicateBuilder.New<ReplyPost>(r => Posts.Keys.Contains(r.Pid)));
posts => posts.Where(r => Posts.Keys.Contains(r.Pid)));

db.ReplyContents.AddRange(changeSet.NewlyAdded // https://github.com/dotnet/efcore/issues/33945
.Select(r => new ReplyContent {Pid = r.Pid, ProtoBufBytes = r.Content}));
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public override SaverChangeSet<SubReplyPost> Save(CrawlerDbContext db)
{
var changeSet = Save(db, sr => sr.Spid,
sr => new SubReplyRevision {TakenAt = sr.UpdatedAt ?? sr.CreatedAt, Spid = sr.Spid},
LinqKit.PredicateBuilder.New<SubReplyPost>(sr => Posts.Keys.Contains(sr.Spid)));
posts => posts.Where(sr => Posts.Keys.Contains(sr.Spid)));

db.SubReplyContents.AddRange(changeSet.NewlyAdded.Select(sr => // https://github.com/dotnet/efcore/issues/33945
new SubReplyContent {Spid = sr.Spid, ProtoBufBytes = sr.Content}));
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public override SaverChangeSet<ThreadPost> Save(CrawlerDbContext db)
var changeSet = Save(db,
th => th.Tid,
th => new ThreadRevision {TakenAt = th.UpdatedAt ?? th.CreatedAt, Tid = th.Tid},
PredicateBuilder.New<ThreadPost>(th => Posts.Keys.Contains(th.Tid)));
posts => posts.Where(th => Posts.Keys.Contains(th.Tid)));

PostSaveHandlers += threadLatestReplierSaver.SaveFromThread(db, changeSet.AllAfter);

Expand Down
16 changes: 9 additions & 7 deletions c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
namespace tbm.Crawler.Tieba.Crawl.Saver.Related;

// locks only using AuthorRevision.Fid and Uid, ignoring TriggeredBy
// this prevents inserting multiple entities with similar time and other fields with the same values
public class AuthorRevisionSaver(
ILogger<AuthorRevisionSaver> logger,
SaverLocks<(Fid Fid, Uid Uid)>.New saverLocksFactory,
SaverLocks<AuthorRevisionSaver.UniqueAuthorRevision>.New saverLocksFactory,
PostType triggeredByPostType)
{
private static readonly HashSet<(Fid Fid, Uid Uid)> GlobalLockedAuthorExpGradeKeys = [];
private readonly Lazy<SaverLocks<(Fid Fid, Uid Uid)>> _authorExpGradeLocksSaverLocks =
private static readonly HashSet<UniqueAuthorRevision> GlobalLockedAuthorExpGradeKeys = [];
private readonly Lazy<SaverLocks<UniqueAuthorRevision>> _authorExpGradeLocksSaverLocks =
new(() => saverLocksFactory(GlobalLockedAuthorExpGradeKeys));

public delegate AuthorRevisionSaver New(PostType triggeredByPostType);
Expand Down Expand Up @@ -42,7 +40,7 @@ public Action SaveAuthorExpGradeRevisions<TPostWithAuthorExpGrade>
private void Save<TPost, TRevision, TValue>(
CrawlerDbContext db,
IReadOnlyCollection<TPost> posts,
SaverLocks<(Fid Fid, Uid Uid)> locks,
SaverLocks<UniqueAuthorRevision> locks,
IQueryable<TRevision> dbSet,
Func<TPost, TValue?> postRevisioningFieldSelector,
Func<TValue?, TValue?, bool> isValueChangedPredicate,
Expand Down Expand Up @@ -85,12 +83,16 @@ private void Save<TPost, TRevision, TValue>(
var newRevisions = newRevisionOfNewUsers
.Concat(newRevisionOfExistingUsers)
.Select(revisionFactory)
.ToDictionary(revision => (revision.Fid, revision.Uid), revision => revision);
.ToDictionary(revision => new UniqueAuthorRevision(revision.Fid, revision.Uid), revision => revision);
db.Set<TRevision>().AddRange(newRevisions
.IntersectByKey(locks.Acquire(newRevisions.Keys))
.Values());
}

// locking key only using AuthorRevision.Fid and Uid, ignoring TriggeredBy
// this prevents inserting multiple entities with similar time and other fields with the same values
public record UniqueAuthorRevision(Fid Fid, Uid Uid);

Check warning on line 94 in c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / ReSharper

"[NotAccessedPositionalProperty.Global] Positional property 'tbm.Crawler.Tieba.Crawl.Saver.Related.AuthorRevisionSaver.UniqueAuthorRevision.Fid' is never accessed (except in implicit Equals/ToString implementations)" on /Users/runner/work/open-tbm/open-tbm/c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs(94,4689)

Check warning on line 94 in c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / ReSharper

"[NotAccessedPositionalProperty.Global] Positional property 'tbm.Crawler.Tieba.Crawl.Saver.Related.AuthorRevisionSaver.UniqueAuthorRevision.Uid' is never accessed (except in implicit Equals/ToString implementations)" on /Users/runner/work/open-tbm/open-tbm/c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs(94,4698)

Check warning on line 94 in c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / ReSharper

"[NotAccessedPositionalProperty.Global] Positional property 'tbm.Crawler.Tieba.Crawl.Saver.Related.AuthorRevisionSaver.UniqueAuthorRevision.Fid' is never accessed (except in implicit Equals/ToString implementations)" on /home/runner/work/open-tbm/open-tbm/c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs(94,4689)

Check warning on line 94 in c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / ReSharper

"[NotAccessedPositionalProperty.Global] Positional property 'tbm.Crawler.Tieba.Crawl.Saver.Related.AuthorRevisionSaver.UniqueAuthorRevision.Uid' is never accessed (except in implicit Equals/ToString implementations)" on /home/runner/work/open-tbm/open-tbm/c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs(94,4698)

Check warning on line 94 in c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / ReSharper

"[NotAccessedPositionalProperty.Global] Positional property 'tbm.Crawler.Tieba.Crawl.Saver.Related.AuthorRevisionSaver.UniqueAuthorRevision.Fid' is never accessed (except in implicit Equals/ToString implementations)" on D:\a\open-tbm\open-tbm\c#\crawler\src\Tieba\Crawl\Saver\Related\AuthorRevisionSaver.cs(94,4689)

Check warning on line 94 in c#/crawler/src/Tieba/Crawl/Saver/Related/AuthorRevisionSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / ReSharper

"[NotAccessedPositionalProperty.Global] Positional property 'tbm.Crawler.Tieba.Crawl.Saver.Related.AuthorRevisionSaver.UniqueAuthorRevision.Uid' is never accessed (except in implicit Equals/ToString implementations)" on D:\a\open-tbm\open-tbm\c#\crawler\src\Tieba\Crawl\Saver\Related\AuthorRevisionSaver.cs(94,4698)

private sealed class LatestAuthorRevisionProjection<TValue>
{
public Time DiscoveredAt { get; init; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public Action Save(CrawlerDbContext db, IEnumerable<ReplyPost> replies)
.DistinctBy(r => r.SignatureId)
.Select(r => new ReplySignature
{
UserId = r.AuthorUid,
Uid = r.AuthorUid,
SignatureId = (uint)r.SignatureId!,
XxHash3 = XxHash3.Hash(r.Signature!),
ProtoBufBytes = r.Signature!,
Expand Down

0 comments on commit 644491c

Please sign in to comment.