forked from sigp/blockprint
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprepare_training_data.py
executable file
·79 lines (61 loc) · 1.79 KB
/
prepare_training_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
import os
import re
import sys
import json
from load_blocks import store_block_rewards
# In lexicographic order, as that's what SciKit uses internally
CLIENTS = ["Lighthouse", "Lodestar", "Nimbus", "Other", "Prysm", "Teku"]
REGEX_PATTERNS = {
"Lighthouse": [
"Lighthouse/v",
".*[Ll]oopring",
],
"Teku": [
"teku/v",
"bitcoinsuisse.com",
".*Allnodes",
],
"Nimbus": [
"Nimbus/v",
],
"Prysm": [
"prylabs",
".*[Dd][Aa]pp[Nn]ode",
"SharedStake.org Prysm",
# Prater only
# "graffitiwall:",
],
"Lodestar": [],
}
REGEX = {
client: [re.compile(pattern) for pattern in patterns]
for (client, patterns) in REGEX_PATTERNS.items()
}
def classify_reward_by_graffiti(block_reward) -> str:
graffiti = block_reward["meta"]["graffiti"]
for (client, regexes) in REGEX.items():
for regex in regexes:
if regex.match(graffiti):
return client
return None
def classify_rewards_by_graffiti(rewards):
result = {client: [] for client in CLIENTS}
for reward in rewards:
client = classify_reward_by_graffiti(reward)
if client is not None:
result[client].append(reward)
return result
def main():
raw_data_dir = sys.argv[1]
proc_data_dir = sys.argv[2]
for input_file in os.listdir(raw_data_dir):
print(f"processing {input_file}")
with open(os.path.join(raw_data_dir, input_file), "r") as f:
rewards = json.load(f)
res = classify_rewards_by_graffiti(rewards)
for (client, examples) in res.items():
for block_rewards in examples:
store_block_rewards(block_rewards, client, proc_data_dir)
if __name__ == "__main__":
main()