Skip to content

Commit

Permalink
* fix dictionary newRevisions may contains duplicate keys with the …
Browse files Browse the repository at this point in the history
…same author uid by ignore all their posts other than the first one

* renamed method from `SaveAuthorRevisions()`
- invokes to `.ForNoKeyUpdateHint()` following previous commit 97d426a @ `AuthorRevisionSaver.SaveAuthorRevisions()`
@ `AuthorRevisionSaver.Save()`
@ c#/crawler
  • Loading branch information
n0099 committed May 18, 2024
1 parent 359969f commit 724585d
Showing 1 changed file with 7 additions and 9 deletions.
16 changes: 7 additions & 9 deletions c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
using LinqToDB.DataProvider.PostgreSQL;

namespace tbm.Crawler.Tieba.Crawl.Saver;

// locks only using AuthorRevision.Fid and Uid, ignoring TriggeredBy
Expand All @@ -14,7 +12,7 @@ public Action SaveAuthorExpGradeRevisions<TPostWithAuthorExpGrade>
(CrawlerDbContext db, IReadOnlyCollection<TPostWithAuthorExpGrade> posts)
where TPostWithAuthorExpGrade : PostWithAuthorExpGrade
{
SaveAuthorRevisions(db, posts, authorExpGradeLocks,
Save(db, posts, authorExpGradeLocks,
db.AuthorExpGradeRevisions,
p => p.AuthorExpGrade,
(a, b) => a != b,
Expand All @@ -36,7 +34,7 @@ public Action SaveAuthorExpGradeRevisions<TPostWithAuthorExpGrade>
return authorExpGradeLocks.Dispose;
}

private static void SaveAuthorRevisions<TPost, TRevision, TValue>(
private static void Save<TPost, TRevision, TValue>(
CrawlerDbContext db,
IReadOnlyCollection<TPost> posts,
SaverLocks<(Fid Fid, Uid Uid)> locks,
Expand All @@ -47,17 +45,17 @@ private static void SaveAuthorRevisions<TPost, TRevision, TValue>(
Func<(Uid Uid, TValue? Value, Time DiscoveredAt), TRevision> revisionFactory)
where TPost : BasePost
where TRevision : AuthorRevision
{
{ // only takes the first of multiple post from the same author
var uniquePosts = posts.DistinctBy(p => p.AuthorUid).ToList();
SharedHelper.GetNowTimestamp(out var now);
var existingRevisionOfExistingUsers = dbSet.AsNoTracking()
.Where(e => e.Fid == db.Fid
&& posts.Select(p => p.AuthorUid).Distinct().Contains(e.Uid))
&& uniquePosts.Select(p => p.AuthorUid).Contains(e.Uid))
.Select(latestRevisionProjectionFactory)
.AsCte() // https://stackoverflow.com/questions/49854322/usage-of-for-update-in-window-function-postgres#comment86726589_49854322
.Where(e => e.Rank == 1)
.AsPostgreSQL().ForNoKeyUpdateHint()
.ToLinqToDB().AsEnumerable()
.Join(posts, e => e.Uid, p => p.AuthorUid, (e, p) =>
.Join(uniquePosts, e => e.Uid, p => p.AuthorUid, (e, p) =>
(
e.Uid,
Existing: (e.DiscoveredAt, e.Value),
Expand All @@ -70,7 +68,7 @@ private static void SaveAuthorRevisions<TPost, TRevision, TValue>(
.Where(t => t.Existing.DiscoveredAt != t.NewInPost.DiscoveredAt
&& isValueChangedPredicate(t.Existing.Value, t.NewInPost.Value))
.Select(t => (t.Uid, t.NewInPost.Value, t.NewInPost.DiscoveredAt));
var newRevisionOfNewUsers = posts
var newRevisionOfNewUsers = uniquePosts
.ExceptBy(existingRevisionOfExistingUsers.Select(t => t.Uid), p => p.AuthorUid)
.Select(p => (Uid: p.AuthorUid, Value: postAuthorFieldValueSelector(p), DiscoveredAt: now));
var newRevisions = newRevisionOfNewUsers
Expand Down

0 comments on commit 724585d

Please sign in to comment.