-
Notifications
You must be signed in to change notification settings - Fork 2
/
history-parser.php
74 lines (60 loc) · 1.96 KB
/
history-parser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
<?php
// Parses downloaded historical data and outputs a JSON file of useful fields
define('HISTORY_FILES', 'history/raw/*.html');
define('JSON_FILE', 'history/history.json');
$data = [];
foreach (glob(HISTORY_FILES) as $file) {
// extract the date
preg_match('/\d\d\d\d-\d\d-\d\d/', $file, $matches);
$date = $matches[0];
print "Parsing $file for date $date\n";
// load the HTML file
$doc = new DOMDocument();
@$doc->loadHTMLFile($file);
// verify table headers
$header = getHeader($doc);
assert($header[2]->textContent == 'Symbol');
assert($header[3]->textContent == 'Market Cap');
assert($header[4]->textContent == 'Price');
$data[$date] = [];
// parse data rows
$rows = getRows($doc);
foreach ($rows as $row) {
$cells = $row->getElementsByTagName('td');
$symbol = trim($cells[2]->textContent);
$marketCap = str_replace([' ', '$', ','], '', trim($cells[3]->textContent));
$price = str_replace([' ', '$', ','], '', trim($cells[4]->textContent));
if ($marketCap != '?') {
// print " * {$symbol} | market cap = {$marketCap} | price = {$price}\n";
$data[$date][] = [ $symbol, $marketCap, $price ];
}
}
}
file_put_contents(JSON_FILE, json_encode($data, JSON_PRETTY_PRINT));
/*************************************************************************/
function getHeader($doc) {
$elem = $doc->getElementById('currencies-all');
if ($elem) {
return $elem
->getElementsByTagName('thead')[0]
->getElementsByTagName('th');
} else {
return
$doc->getElementsByTagName('table')[2]
->getElementsByTagName('thead')[0]
->getElementsByTagName('th');
}
}
function getRows($doc) {
$elem = $doc->getElementById('currencies-all');
if ($elem) {
return $elem
->getElementsByTagName('tbody')[0]
->getElementsByTagName('tr');
} else {
return
$doc->getElementsByTagName('table')[2]
->getElementsByTagName('tbody')[0]
->getElementsByTagName('tr');
}
}