This repository has been archived by the owner on Sep 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy path2021-123rf_com.py
50 lines (37 loc) · 1.57 KB
/
2021-123rf_com.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import collections
from parsers import base
class Parse(base.Parser):
"""
A 123rf.com breach data parser
Source File SHA-1: 28f439448b7a8237e62847d4df48b95d42c1fec4 123rf.com member.sql
Good Lines: 8,292,274
"""
name = "None"
web = "www.123rf.com"
year = "2021"
def row_format(self, r: str) -> tuple:
"""
'036','','882a8cafb7c64a3de1329048debd2469','','osamu','iizuka',
'238 West 74th Street Apt. 4A','','New York','NY','US','10023',
'646 468 4097','[email protected]','Y','',0.00,1,'','paypal','',
'50.00','N','2008-09-17 00:00:00','','','','','','','Y','','Y',
'66.108.27.17','us15',1,1,'',0,'US','NY','','','','','',0,0
:param r:
:return:
"""
row = r.split(',')
email = row[13].replace('\'', '').strip()
pw_hash = row[2].replace('\'', '').strip()
domain = email.split('@')[1] if '@' in email else ''
return self.name, self.web, int(self.year), domain, email, '', pw_hash, ''
def process_rows(self) -> collections.abc.Iterable[tuple]:
with open(self.source, 'r', encoding='utf-8', errors='ignore') as source:
for row in source:
if row is None:
continue
if not row.startswith(r"INSERT INTO `member`"):
continue
_, values = row.split('VALUES')
inserts = values.split(r'),(')
for value_tuple in inserts:
yield self.row_format(value_tuple)