Skip to content

Commit

Permalink
* prevent selecting all fields of entity TRevision by casting `IQue…
Browse files Browse the repository at this point in the history
…ryable<TRevision>` to `IQueryable<TBaseRevision>` then project to `RevisionIdWithDuplicateIndexProjectionFactory()`

* quick exit when param `newRevisions` is empty to prevent execute sql with WHERE FALSE clause
@ `AddRevisionsWithDuplicateIndex()`

+ method `RevisionIdWithDuplicateIndexProjectionFactory()` & nested class `RevisionIdWithDuplicateIndexProjection`
* rename method `IsRevisionEntityIdEqualsExpression()` & `RevisionEntityIdSelector()` to `IsRevisionIdEqualsExpression()` & `RevisionIdSelector()`
* rename type param `TEntityId` to `TRevisionId` and constrained to `struct`
@ SaverWithRevision.cs
@ c#/crawler
  • Loading branch information
n0099 committed Jun 1, 2024
1 parent 522f9b3 commit 4c2e953
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 15 deletions.
1 change: 1 addition & 0 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/PostSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ public abstract class PostSaver<TPost, TBaseRevision, TPostId>(
: SaverWithRevision<TBaseRevision, TPostId>(logger), IPostSaver<TPost>
where TPost : BasePost
where TBaseRevision : BaseRevisionWithSplitting
where TPostId : struct
{
protected delegate void PostSaveHandler();

Expand Down
7 changes: 5 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,13 @@ public override SaverChangeSet<ReplyPost> Save(CrawlerDbContext db)
return changeSet;
}

protected override Pid RevisionEntityIdSelector(BaseReplyRevision entity) => entity.Pid;
protected override Pid RevisionIdSelector(BaseReplyRevision entity) => entity.Pid;
protected override Expression<Func<BaseReplyRevision, bool>>
IsRevisionEntityIdEqualsExpression(BaseReplyRevision newRevision) =>
IsRevisionIdEqualsExpression(BaseReplyRevision newRevision) =>
existingRevision => existingRevision.Pid == newRevision.Pid;
protected override Expression<Func<BaseReplyRevision, RevisionIdWithDuplicateIndexProjection>>
RevisionIdWithDuplicateIndexProjectionFactory() =>
e => new() {RevisionId = e.Pid, DuplicateIndex = e.DuplicateIndex};

protected override bool FieldUpdateIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
Expand Down
7 changes: 5 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,13 @@ public override SaverChangeSet<SubReplyPost> Save(CrawlerDbContext db)
return changeSet;
}

protected override Spid RevisionEntityIdSelector(BaseSubReplyRevision entity) => entity.Spid;
protected override Spid RevisionIdSelector(BaseSubReplyRevision entity) => entity.Spid;
protected override Expression<Func<BaseSubReplyRevision, bool>>
IsRevisionEntityIdEqualsExpression(BaseSubReplyRevision newRevision) =>
IsRevisionIdEqualsExpression(BaseSubReplyRevision newRevision) =>
existingRevision => existingRevision.Spid == newRevision.Spid;
protected override Expression<Func<BaseSubReplyRevision, RevisionIdWithDuplicateIndexProjection>>
RevisionIdWithDuplicateIndexProjectionFactory() =>
e => new() {RevisionId = e.Spid, DuplicateIndex = e.DuplicateIndex};

protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => 0;
}
7 changes: 5 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,13 @@ public override SaverChangeSet<ThreadPost> Save(CrawlerDbContext db) =>
th => new ThreadRevision {TakenAt = th.UpdatedAt ?? th.CreatedAt, Tid = th.Tid},
PredicateBuilder.New<ThreadPost>(th => Posts.Keys.Contains(th.Tid)));

protected override Tid RevisionEntityIdSelector(BaseThreadRevision entity) => entity.Tid;
protected override Tid RevisionIdSelector(BaseThreadRevision entity) => entity.Tid;
protected override Expression<Func<BaseThreadRevision, bool>>
IsRevisionEntityIdEqualsExpression(BaseThreadRevision newRevision) =>
IsRevisionIdEqualsExpression(BaseThreadRevision newRevision) =>
existingRevision => existingRevision.Tid == newRevision.Tid;
protected override Expression<Func<BaseThreadRevision, RevisionIdWithDuplicateIndexProjection>>
RevisionIdWithDuplicateIndexProjectionFactory() =>
e => new() {RevisionId = e.Tid, DuplicateIndex = e.DuplicateIndex};

protected override bool FieldUpdateIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
Expand Down
27 changes: 20 additions & 7 deletions c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

namespace tbm.Crawler.Tieba.Crawl.Saver;

public abstract partial class SaverWithRevision<TBaseRevision, TEntityId>(
ILogger<SaverWithRevision<TBaseRevision, TEntityId>> logger)
public abstract partial class SaverWithRevision<TBaseRevision, TRevisionId>(
ILogger<SaverWithRevision<TBaseRevision, TRevisionId>> logger)
: IRevisionProperties
where TBaseRevision : BaseRevisionWithSplitting
where TRevisionId : struct
{
protected delegate void AddSplitRevisionsDelegate(CrawlerDbContext db, IEnumerable<TBaseRevision> revisions);
protected abstract Lazy<Dictionary<Type, AddSplitRevisionsDelegate>>
Expand All @@ -15,6 +16,7 @@ protected void AddRevisionsWithDuplicateIndex<TRevision>(CrawlerDbContext db, IE
where TRevision : TBaseRevision
{
var newRevisions = revisions.OfType<TRevision>().ToList();
if (newRevisions.Count == 0) return; // quick exit to prevent execute sql with WHERE FALSE clause
var dbSet = db.Set<TRevision>();
var visitor = new ReplaceParameterTypeVisitor<TBaseRevision, TRevision>();

Expand All @@ -26,11 +28,13 @@ protected void AddRevisionsWithDuplicateIndex<TRevision>(CrawlerDbContext db, IE
(predicate, newRevision) => predicate.Or(LinqKit.PredicateBuilder
.New<TRevision>(existingRevision => existingRevision.TakenAt == newRevision.TakenAt)
.And((Expression<Func<TRevision, bool>>)visitor
.Visit(IsRevisionEntityIdEqualsExpression(newRevision))))))
.Visit(IsRevisionIdEqualsExpression(newRevision))))))
.Cast<TBaseRevision>()
.Select(RevisionIdWithDuplicateIndexProjectionFactory())
.ToList();
(from existingRevision in existingRevisions
join newRevision in newRevisions
on RevisionEntityIdSelector(existingRevision) equals RevisionEntityIdSelector(newRevision)
on existingRevision.RevisionId equals RevisionIdSelector(newRevision)
select (existingRevision, newRevision))
.ForEach(t =>
t.newRevision.DuplicateIndex = (ushort)(t.existingRevision.DuplicateIndex + 1));
Expand All @@ -39,9 +43,18 @@ on RevisionEntityIdSelector(existingRevision) equals RevisionEntityIdSelector(ne

protected abstract NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName);

protected abstract TEntityId RevisionEntityIdSelector(TBaseRevision entity);
protected abstract TRevisionId RevisionIdSelector(TBaseRevision entity);
protected abstract Expression<Func<TBaseRevision, bool>>
IsRevisionEntityIdEqualsExpression(TBaseRevision newRevision);
IsRevisionIdEqualsExpression(TBaseRevision newRevision);

protected abstract Expression<Func<TBaseRevision, RevisionIdWithDuplicateIndexProjection>>
RevisionIdWithDuplicateIndexProjectionFactory();
[SuppressMessage("ReSharper", "PropertyCanBeMadeInitOnly.Global")]
protected class RevisionIdWithDuplicateIndexProjection
{
public TRevisionId RevisionId { get; set; }
public ushort DuplicateIndex { get; set; }
}

protected virtual bool ShouldIgnoreEntityRevision(string propName, PropertyEntry propEntry, EntityEntry entityEntry) => false;

Check failure on line 59 in c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs

View workflow job for this annotation

GitHub Actions / build (crawler)

Check failure on line 59 in c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs

View workflow job for this annotation

GitHub Actions / build (crawler)

protected virtual bool FieldUpdateIgnorance(string propName, object? oldValue, object? newValue) => false;
Expand All @@ -52,7 +65,7 @@ protected abstract Expression<Func<TBaseRevision, bool>>
_ => false
};
}
public abstract partial class SaverWithRevision<TBaseRevision, TEntityId>
public abstract partial class SaverWithRevision<TBaseRevision, TRevisionId>
{
protected void SaveEntitiesWithRevision<TEntity, TRevision>(
CrawlerDbContext db,
Expand Down
7 changes: 5 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@ protected override Lazy<Dictionary<Type, AddSplitRevisionsDelegate>>
{typeof(UserRevision.SplitIpGeolocation), AddRevisionsWithDuplicateIndex<UserRevision.SplitIpGeolocation>}
});

protected override Uid RevisionEntityIdSelector(BaseUserRevision entity) => entity.Uid;
protected override Uid RevisionIdSelector(BaseUserRevision entity) => entity.Uid;
protected override Expression<Func<BaseUserRevision, bool>>
IsRevisionEntityIdEqualsExpression(BaseUserRevision newRevision) =>
IsRevisionIdEqualsExpression(BaseUserRevision newRevision) =>
existingRevision => existingRevision.Uid == newRevision.Uid;
protected override Expression<Func<BaseUserRevision, RevisionIdWithDuplicateIndexProjection>>
RevisionIdWithDuplicateIndexProjectionFactory() =>
e => new() {RevisionId = e.Uid, DuplicateIndex = e.DuplicateIndex};

protected override bool ShouldIgnoreEntityRevision(string propName, PropertyEntry propEntry, EntityEntry entityEntry)
{
Expand Down

0 comments on commit 4c2e953

Please sign in to comment.