-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* split class
UserParserAndSaver
into two classes `User(Saver|Parse…
…r)` to fix `AV1000: Type '' contains the word 'and', which suggests it has multiple purposes` + primary ctor param `user(Saver|Parser)Factory` + fields `_users`, and `_usersParser` with its get-only prop for initialization - required prop `Users` + param `userSaver` for its only usage in class `ThreadCrawlFacade` @ `BeforeCommitSaveHook()` @ BaseCrawlFacade.cs @ c#/crawler
- Loading branch information
Showing
9 changed files
with
172 additions
and
155 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
namespace tbm.Crawler.Tieba.Crawl.Parser; | ||
|
||
public partial class UserParser(ConcurrentDictionary<Uid, User> users) | ||
{ | ||
public delegate UserParser New(ConcurrentDictionary<Uid, User> users); | ||
|
||
public void ParseUsers(IEnumerable<TbClient.User> inUsers) => | ||
inUsers.Select(el => | ||
{ | ||
static (string Portrait, uint? UpdateTime) ExtractPortrait(string portrait) => | ||
ExtractPortraitRegex().Match(portrait) is {Success: true} m | ||
? (m.Groups["portrait"].Value, Time.Parse(m.Groups["timestamp"].ValueSpan, CultureInfo.InvariantCulture)) | ||
: (portrait, null); | ||
|
||
var uid = el.Uid; | ||
if (uid == 0) return null; // in client version 12.x the last user in list will be an empty user with uid 0 | ||
var (portrait, portraitUpdatedAt) = ExtractPortrait(el.Portrait); | ||
if (uid < 0) // historical anonymous user | ||
{ | ||
return new() {Uid = uid, Name = el.NameShow, Portrait = portrait, PortraitUpdatedAt = portraitUpdatedAt}; | ||
} | ||
|
||
// will be an empty string when the user hasn't set a username for their baidu account yet | ||
var name = el.Name.NullIfEmpty(); | ||
var nameShow = el.NameShow.NullIfEmpty(); | ||
var u = new User(); | ||
try | ||
{ | ||
u.Uid = uid; | ||
u.Name = name; | ||
u.DisplayName = name == nameShow ? null : nameShow; | ||
u.Portrait = portrait; | ||
u.PortraitUpdatedAt = portraitUpdatedAt; | ||
u.Gender = (byte)el.Gender; // 0 when the user hasn't explicitly set their gender | ||
u.FansNickname = el.FansNickname.NullIfEmpty(); | ||
u.Icon = Helper.SerializedProtoBufWrapperOrNullIfEmpty(el.Iconinfo, | ||
() => new UserIconWrapper {Value = {el.Iconinfo}}); | ||
u.IpGeolocation = el.IpAddress.NullIfEmpty(); | ||
return u; | ||
} | ||
catch (Exception e) | ||
{ | ||
e.Data["raw"] = Helper.UnescapedJsonSerialize(el); | ||
throw new InvalidDataException("User parse error.", e); | ||
} | ||
}).OfType<User>().ForEach(u => users[u.Uid] = u); | ||
|
||
public void ResetUsersIcon() => users.Values.ForEach(u => u.Icon = null); | ||
|
||
[GeneratedRegex("^(?<portrait>.+)\\?t=(?<timestamp>[0-9]+)$", RegexOptions.Compiled, matchTimeoutMilliseconds: 100)] | ||
private static partial Regex ExtractPortraitRegex(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
namespace tbm.Crawler.Tieba.Crawl.Saver; | ||
|
||
public partial class UserSaver | ||
{ | ||
protected override Dictionary<Type, RevisionUpsertDelegate> | ||
RevisionUpsertDelegatesKeyBySplitEntityType { get; } = new() | ||
{ | ||
{ | ||
typeof(UserRevision.SplitDisplayName), (db, revisions) => | ||
db.Set<UserRevision.SplitDisplayName>() | ||
.UpsertRange(revisions.OfType<UserRevision.SplitDisplayName>()).NoUpdate().Run() | ||
}, | ||
{ | ||
typeof(UserRevision.SplitPortraitUpdatedAt), (db, revisions) => | ||
db.Set<UserRevision.SplitPortraitUpdatedAt>() | ||
.UpsertRange(revisions.OfType<UserRevision.SplitPortraitUpdatedAt>()).NoUpdate().Run() | ||
}, | ||
{ | ||
typeof(UserRevision.SplitIpGeolocation), (db, revisions) => | ||
db.Set<UserRevision.SplitIpGeolocation>() | ||
.UpsertRange(revisions.OfType<UserRevision.SplitIpGeolocation>()).NoUpdate().Run() | ||
} | ||
}; | ||
|
||
[SuppressMessage("StyleCop.CSharp.SpacingRules", "SA1025:Code should not contain multiple whitespace in a row")] | ||
protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => fieldName switch | ||
{ | ||
nameof(User.Name) => 1, | ||
nameof(User.Gender) => 1 << 3, | ||
nameof(User.Icon) => 1 << 5, | ||
_ => 0 | ||
}; | ||
} | ||
public partial class UserSaver(ILogger<UserSaver> logger, ConcurrentDictionary<Uid, User> users) | ||
: CommonInSavers<BaseUserRevision>(logger) | ||
{ | ||
public delegate UserSaver New(ConcurrentDictionary<Uid, User> users); | ||
|
||
private static readonly HashSet<Uid> UserIdLocks = []; | ||
Check failure on line 39 in c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs GitHub Actions / build (crawler)
Check failure on line 39 in c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs GitHub Actions / build (crawler)
|
||
private readonly List<Uid> _savedUsersId = []; | ||
|
||
public void SaveUsers( | ||
CrawlerDbContext db, | ||
string postType, | ||
FieldChangeIgnoranceDelegates userFieldChangeIgnorance) | ||
{ | ||
if (users.IsEmpty) return; | ||
lock (UserIdLocks) | ||
{ | ||
var usersExceptLocked = new Dictionary<Uid, User>(users.ExceptBy(UserIdLocks, pair => pair.Key)); | ||
if (usersExceptLocked.Count == 0) return; | ||
_savedUsersId.AddRange(usersExceptLocked.Keys); | ||
UserIdLocks.UnionWith(_savedUsersId); | ||
|
||
var existingUsersKeyByUid = (from user in db.Users.AsTracking().ForUpdate() | ||
where usersExceptLocked.Keys.Contains(user.Uid) | ||
select user).ToDictionary(u => u.Uid); | ||
SavePostsOrUsers(db, userFieldChangeIgnorance, | ||
u => new UserRevision | ||
{ | ||
TakenAt = u.UpdatedAt ?? u.CreatedAt, | ||
Uid = u.Uid, | ||
TriggeredBy = postType | ||
}, | ||
usersExceptLocked.Values.ToLookup(u => existingUsersKeyByUid.ContainsKey(u.Uid)), | ||
u => existingUsersKeyByUid[u.Uid]); | ||
} | ||
} | ||
|
||
public IEnumerable<Uid> AcquireUidLocksForSave(IEnumerable<Uid> usersId) | ||
{ | ||
lock (UserIdLocks) | ||
{ | ||
var exceptLocked = usersId.Except(UserIdLocks).ToList(); | ||
if (exceptLocked.Count == 0) return exceptLocked; | ||
_savedUsersId.AddRange(exceptLocked); // assume all given users are saved | ||
UserIdLocks.UnionWith(exceptLocked); | ||
return exceptLocked; | ||
} | ||
} | ||
|
||
public void PostSaveHook() | ||
{ | ||
lock (UserIdLocks) if (_savedUsersId.Count != 0) UserIdLocks.ExceptWith(_savedUsersId); | ||
} | ||
} |
Oops, something went wrong.