Skip to content

Commit

Permalink
Add Data Preparation tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
nkaz001 committed Apr 5, 2023
1 parent fb8caa3 commit b73eae7
Show file tree
Hide file tree
Showing 11 changed files with 813 additions and 305 deletions.
724 changes: 724 additions & 0 deletions examples/Data Preparation.ipynb

Large diffs are not rendered by default.

Binary file added examples/usdm/btcusdt_20230404.dat.gz
Binary file not shown.
Binary file added examples/usdm/btcusdt_20230405.dat.gz
Binary file not shown.
37 changes: 19 additions & 18 deletions hftbacktest/data/utils/binancefutures.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@

from hftbacktest import correct, validate_data

"""
Collected Binance Futures stream file sample.

local_timestamp raw_stream
1660228023037049 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228023941,"T":1660228023931,"s":"BTCUSDT","U":1801732831593,"u":1801732832589,"pu":1801732831561,"b":[["2467.10","0.000"],["12006.00","0.001"],["24427.70","4.350"],["24620.30","0.172"],["24644.00","44.832"],["24645.40","0.203"],["24652.80","4.900"],["24664.10","4.279"],["24666.50","0.554"],["24666.80","6.764"],["24668.70","7.428"],["24670.90","2.000"],["24671.00","0.000"],["24672.70","0.000"],["24688.30","0.000"]],"a":[["24653.60","0.000"],["24669.80","0.000"],["24670.20","0.000"],["24670.70","0.000"],["24670.90","0.000"],["24671.00","20.812"],["24672.10","0.000"],["24672.30","0.001"],["24674.60","1.520"],["24674.80","0.000"],["24684.20","4.519"],["24684.30","0.202"],["24685.00","0.937"],["24690.90","4.827"],["24693.60","1.500"],["24729.10","0.171"]]}}
1660228023038319 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228023977,"T":1660228023966,"s":"BTCUSDT","U":1801732832805,"u":1801732834115,"pu":1801732832589,"b":[["2467.10","0.008"],["24643.00","4.457"],["24656.30","0.010"],["24657.70","0.005"],["24658.80","1.000"],["24658.90","1.500"],["24659.50","3.781"],["24659.70","1.806"],["24659.90","0.105"],["24660.60","0.787"],["24666.30","5.033"],["24666.40","0.012"],["24666.50","0.556"],["24668.70","7.426"],["24668.90","0.000"],["24670.90","2.535"],["24680.00","0.000"],["24688.30","0.000"]],"a":[["24653.60","0.000"],["24670.10","0.000"],["24670.60","0.000"],["24670.70","0.000"],["24670.90","0.000"],["24671.00","20.642"],["24672.00","0.000"],["24672.10","0.000"],["24673.50","0.145"],["24673.60","1.567"],["24674.50","3.746"],["24674.60","1.520"],["24678.30","1.304"],["24678.40","0.001"],["24678.80","0.546"],["24678.90","0.002"],["24681.60","0.020"],["24681.70","0.613"],["24681.90","0.077"],["24682.10","3.000"],["24682.20","0.000"],["24683.70","0.163"],["24683.80","4.162"],["24684.00","1.227"],["24684.20","4.519"],["24684.30","0.202"],["24684.90","1.331"],["24685.70","0.156"],["24685.80","0.325"],["24686.70","0.648"],["24692.60","0.040"],["24700.00","47.420"],["24729.10","0.006"]]}}
1660228023043260 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228023980,"T":1660228023973,"s":"BTCUSDT","t":2691833663,"p":"24670.90","q":"0.022","X":"MARKET","m":true}}
1660228023052991 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228023991,"T":1660228023983,"s":"BTCUSDT","t":2691833664,"p":"24671.00","q":"0.001","X":"MARKET","m":false}}
1660228023071108 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228024010,"T":1660228024002,"s":"BTCUSDT","U":1801732834136,"u":1801732835323,"pu":1801732834115,"b":[["2467.10","0.000"],["12006.00","0.000"],["24599.40","0.641"],["24603.20","0.104"],["24625.50","0.152"],["24645.20","0.476"],["24646.80","0.081"],["24652.60","0.254"],["24664.10","4.279"],["24666.50","0.878"],["24668.80","0.004"],["24670.90","2.513"],["24688.30","0.000"],["24787.00","0.000"]],"a":[["24653.60","0.000"],["24668.10","0.000"],["24668.70","0.000"],["24669.50","0.000"],["24669.80","0.000"],["24670.00","0.000"],["24670.60","0.000"],["24670.70","0.000"],["24670.90","0.000"],["24671.00","20.641"],["24672.20","0.000"],["24672.30","0.001"],["24673.50","0.040"],["24673.90","0.105"],["24674.70","2.139"],["24674.80","0.000"],["24683.70","0.963"],["24683.90","0.009"],["24685.70","0.556"],["24709.30","0.254"],["24723.80","0.000"],["24728.30","0.193"],["24729.50","4.477"],["24739.40","0.807"],["24743.20","0.235"],["24795.00","0.130"]]}}
1660228023117894 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228024044,"T":1660228024034,"s":"BTCUSDT","U":1801732835406,"u":1801732836571,"pu":1801732835323,"b":[["2467.10","0.000"],["24337.10","2.462"],["24616.50","1.050"],["24619.00","0.235"],["24640.00","5.148"],["24649.80","2.805"],["24650.00","14.374"],["24651.90","3.000"],["24653.30","1.400"],["24658.70","1.142"],["24658.80","0.000"],["24659.60","3.263"],["24659.70","0.006"],["24660.50","0.840"],["24660.60","0.387"],["24662.20","0.202"],["24663.10","7.147"],["24664.00","0.922"],["24664.20","0.131"],["24664.50","0.027"],["24666.20","7.066"],["24666.40","0.012"],["24668.80","0.002"],["24669.30","0.002"],["24670.20","0.811"],["24670.90","5.817"],["24688.30","0.000"]],"a":[["24653.60","0.000"],["24669.80","0.000"],["24669.90","0.000"],["24670.90","0.000"],["24671.00","20.121"],["24672.10","0.000"],["24672.80","0.000"],["24674.60","1.520"],["24675.30","0.421"],["24681.20","0.239"],["24681.50","1.343"],["24681.60","0.020"],["24681.70","0.213"],["24683.60","2.929"],["24683.70","0.163"],["24683.80","2.162"],["24684.70","0.646"],["24684.90","0.731"],["24692.90","0.321"],["24693.10","0.040"],["24700.70","0.537"],["24703.60","0.210"],["24721.50","7.245"]]}}
1660228023125009 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228024062,"T":1660228024055,"s":"BTCUSDT","t":2691833665,"p":"24670.90","q":"0.002","X":"MARKET","m":true}}
1660228023128966 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228024067,"T":1660228024061,"s":"BTCUSDT","t":2691833666,"p":"24670.90","q":"0.020","X":"MARKET","m":true}}
1660228023138740 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228024077,"T":1660228024066,"s":"BTCUSDT","U":1801732836639,"u":1801732837803,"pu":1801732836571,"b":[["2467.10","0.000"],["24659.00","0.000"],["24659.30","2.500"],["24663.00","1.038"],["24664.20","0.118"],["24666.20","7.065"],["24666.50","0.554"],["24666.70","3.987"],["24666.80","7.088"],["24666.90","0.014"],["24667.40","1.506"],["24668.90","0.006"],["24670.10","0.272"],["24670.90","6.726"],["24688.30","0.000"]],"a":[["24653.60","0.000"],["24668.70","0.000"],["24670.30","0.000"],["24670.50","0.000"],["24670.90","0.000"],["24679.00","0.001"],["24703.10","1.500"],["24710.50","0.057"],["24728.30","0.028"],["24768.50","0.318"],["24980.10","5.446"],["25050.00","119.300"]]}}
1660228023149748 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228024088,"T":1660228024081,"s":"BTCUSDT","t":2691833667,"p":"24671.00","q":"0.063","X":"MARKET","m":false}}
"""
def convert(input_filename, output_filename=None, opt='', base_latency=0):
"""
Collected Binance Futures stream file sample.
def convert(input_filename, output_filename, opt='', base_latency=0):
local_timestamp raw_stream
1660228023037049 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228023941,"T":1660228023931,"s":"BTCUSDT","U":1801732831593,"u":1801732832589,"pu":1801732831561,"b":[["2467.10","0.000"],["12006.00","0.001"],["24427.70","4.350"],["24620.30","0.172"],["24644.00","44.832"],["24645.40","0.203"],["24652.80","4.900"],["24664.10","4.279"],["24666.50","0.554"],["24666.80","6.764"],["24668.70","7.428"],["24670.90","2.000"],["24671.00","0.000"],["24672.70","0.000"],["24688.30","0.000"]],"a":[["24653.60","0.000"],["24669.80","0.000"],["24670.20","0.000"],["24670.70","0.000"],["24670.90","0.000"],["24671.00","20.812"],["24672.10","0.000"],["24672.30","0.001"],["24674.60","1.520"],["24674.80","0.000"],["24684.20","4.519"],["24684.30","0.202"],["24685.00","0.937"],["24690.90","4.827"],["24693.60","1.500"],["24729.10","0.171"]]}}
1660228023038319 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228023977,"T":1660228023966,"s":"BTCUSDT","U":1801732832805,"u":1801732834115,"pu":1801732832589,"b":[["2467.10","0.008"],["24643.00","4.457"],["24656.30","0.010"],["24657.70","0.005"],["24658.80","1.000"],["24658.90","1.500"],["24659.50","3.781"],["24659.70","1.806"],["24659.90","0.105"],["24660.60","0.787"],["24666.30","5.033"],["24666.40","0.012"],["24666.50","0.556"],["24668.70","7.426"],["24668.90","0.000"],["24670.90","2.535"],["24680.00","0.000"],["24688.30","0.000"]],"a":[["24653.60","0.000"],["24670.10","0.000"],["24670.60","0.000"],["24670.70","0.000"],["24670.90","0.000"],["24671.00","20.642"],["24672.00","0.000"],["24672.10","0.000"],["24673.50","0.145"],["24673.60","1.567"],["24674.50","3.746"],["24674.60","1.520"],["24678.30","1.304"],["24678.40","0.001"],["24678.80","0.546"],["24678.90","0.002"],["24681.60","0.020"],["24681.70","0.613"],["24681.90","0.077"],["24682.10","3.000"],["24682.20","0.000"],["24683.70","0.163"],["24683.80","4.162"],["24684.00","1.227"],["24684.20","4.519"],["24684.30","0.202"],["24684.90","1.331"],["24685.70","0.156"],["24685.80","0.325"],["24686.70","0.648"],["24692.60","0.040"],["24700.00","47.420"],["24729.10","0.006"]]}}
1660228023043260 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228023980,"T":1660228023973,"s":"BTCUSDT","t":2691833663,"p":"24670.90","q":"0.022","X":"MARKET","m":true}}
1660228023052991 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228023991,"T":1660228023983,"s":"BTCUSDT","t":2691833664,"p":"24671.00","q":"0.001","X":"MARKET","m":false}}
1660228023071108 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228024010,"T":1660228024002,"s":"BTCUSDT","U":1801732834136,"u":1801732835323,"pu":1801732834115,"b":[["2467.10","0.000"],["12006.00","0.000"],["24599.40","0.641"],["24603.20","0.104"],["24625.50","0.152"],["24645.20","0.476"],["24646.80","0.081"],["24652.60","0.254"],["24664.10","4.279"],["24666.50","0.878"],["24668.80","0.004"],["24670.90","2.513"],["24688.30","0.000"],["24787.00","0.000"]],"a":[["24653.60","0.000"],["24668.10","0.000"],["24668.70","0.000"],["24669.50","0.000"],["24669.80","0.000"],["24670.00","0.000"],["24670.60","0.000"],["24670.70","0.000"],["24670.90","0.000"],["24671.00","20.641"],["24672.20","0.000"],["24672.30","0.001"],["24673.50","0.040"],["24673.90","0.105"],["24674.70","2.139"],["24674.80","0.000"],["24683.70","0.963"],["24683.90","0.009"],["24685.70","0.556"],["24709.30","0.254"],["24723.80","0.000"],["24728.30","0.193"],["24729.50","4.477"],["24739.40","0.807"],["24743.20","0.235"],["24795.00","0.130"]]}}
1660228023117894 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228024044,"T":1660228024034,"s":"BTCUSDT","U":1801732835406,"u":1801732836571,"pu":1801732835323,"b":[["2467.10","0.000"],["24337.10","2.462"],["24616.50","1.050"],["24619.00","0.235"],["24640.00","5.148"],["24649.80","2.805"],["24650.00","14.374"],["24651.90","3.000"],["24653.30","1.400"],["24658.70","1.142"],["24658.80","0.000"],["24659.60","3.263"],["24659.70","0.006"],["24660.50","0.840"],["24660.60","0.387"],["24662.20","0.202"],["24663.10","7.147"],["24664.00","0.922"],["24664.20","0.131"],["24664.50","0.027"],["24666.20","7.066"],["24666.40","0.012"],["24668.80","0.002"],["24669.30","0.002"],["24670.20","0.811"],["24670.90","5.817"],["24688.30","0.000"]],"a":[["24653.60","0.000"],["24669.80","0.000"],["24669.90","0.000"],["24670.90","0.000"],["24671.00","20.121"],["24672.10","0.000"],["24672.80","0.000"],["24674.60","1.520"],["24675.30","0.421"],["24681.20","0.239"],["24681.50","1.343"],["24681.60","0.020"],["24681.70","0.213"],["24683.60","2.929"],["24683.70","0.163"],["24683.80","2.162"],["24684.70","0.646"],["24684.90","0.731"],["24692.90","0.321"],["24693.10","0.040"],["24700.70","0.537"],["24703.60","0.210"],["24721.50","7.245"]]}}
1660228023125009 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228024062,"T":1660228024055,"s":"BTCUSDT","t":2691833665,"p":"24670.90","q":"0.002","X":"MARKET","m":true}}
1660228023128966 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228024067,"T":1660228024061,"s":"BTCUSDT","t":2691833666,"p":"24670.90","q":"0.020","X":"MARKET","m":true}}
1660228023138740 {"stream":"btcusdt@depth@0ms","data":{"e":"depthUpdate","E":1660228024077,"T":1660228024066,"s":"BTCUSDT","U":1801732836639,"u":1801732837803,"pu":1801732836571,"b":[["2467.10","0.000"],["24659.00","0.000"],["24659.30","2.500"],["24663.00","1.038"],["24664.20","0.118"],["24666.20","7.065"],["24666.50","0.554"],["24666.70","3.987"],["24666.80","7.088"],["24666.90","0.014"],["24667.40","1.506"],["24668.90","0.006"],["24670.10","0.272"],["24670.90","6.726"],["24688.30","0.000"]],"a":[["24653.60","0.000"],["24668.70","0.000"],["24670.30","0.000"],["24670.50","0.000"],["24670.90","0.000"],["24679.00","0.001"],["24703.10","1.500"],["24710.50","0.057"],["24728.30","0.028"],["24768.50","0.318"],["24980.10","5.446"],["25050.00","119.300"]]}}
1660228023149748 {"stream":"btcusdt@trade","data":{"e":"trade","E":1660228024088,"T":1660228024081,"s":"BTCUSDT","t":2691833667,"p":"24671.00","q":"0.063","X":"MARKET","m":false}}
"""
rows = []
with gzip.open(input_filename, 'r') as f:
while True:
Expand Down Expand Up @@ -93,7 +93,8 @@ def convert(input_filename, output_filename, opt='', base_latency=0):
if num_corr < 0:
raise ValueError

print('Saving to %s' % output_filename)
np.savez(output_filename, data=data)
if output_filename is not None:
print('Saving to %s' % output_filename)
np.savez(output_filename, data=data)

print('Done')
return data
47 changes: 39 additions & 8 deletions hftbacktest/data/utils/snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,49 @@
from ...reader import UNTIL_END_OF_DATA


def create_last_snapshot(data_filename, output_snapshot_filename, tick_size, lot_size, initial_snapshot_filename=None):
def create_last_snapshot(
data,
tick_size,
lot_size,
initial_snapshot=None,
output_snapshot_filename=None
):
# Just to reconstruct order book from the given snapshot to the end of the given data.
hbt = HftBacktest(data_filename, tick_size, lot_size, 0, 0, ConstantLatency(0, 0), Linear, snapshot=initial_snapshot_filename)
hbt = HftBacktest(
data,
tick_size,
lot_size,
0,
0,
ConstantLatency(0, 0),
Linear,
snapshot=initial_snapshot
)

# Go to the end of the data.
hbt.goto(UNTIL_END_OF_DATA)

snapshot = []
snapshot += [[4, hbt.last_timestamp, -1, 1, float(bid * tick_size), float(qty)]
for bid, qty in sorted(hbt.bid_depth.items(), key=lambda v: -float(v[0]))]
snapshot += [[4, hbt.last_timestamp, -1, -1, float(ask * tick_size), float(qty)]
for ask, qty in sorted(hbt.ask_depth.items(), key=lambda v: float(v[0]))]
snapshot += [[
4,
hbt.last_timestamp,
-1,
1,
float(bid * tick_size),
float(qty)
] for bid, qty in sorted(hbt.bid_depth.items(), key=lambda v: -float(v[0]))]
snapshot += [[
4,
hbt.last_timestamp,
-1,
-1,
float(ask * tick_size),
float(qty)
] for ask, qty in sorted(hbt.ask_depth.items(), key=lambda v: float(v[0]))]

np.savez(output_snapshot_filename, data=np.asarray(snapshot, np.float64))
print('Done')
snapshot = np.asarray(snapshot, np.float64)

if output_snapshot_filename is not None:
np.savez(output_snapshot_filename, data=snapshot)

return snapshot
39 changes: 31 additions & 8 deletions hftbacktest/data/utils/tardis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
from .. import merge_on_local_timestamp, correct, validate_data


def convert(input_files, output_filename, buffer_size=100_000_000, base_latency=0):
def convert(
input_files,
output_filename=None,
buffer_size=100_000_000,
base_latency=0
):
TRADE = 0
DEPTH = 1

Expand All @@ -27,12 +32,30 @@ def convert(input_files, output_filename, buffer_size=100_000_000, base_latency=
break
cols = line.decode().strip().split(',')
if len(cols) < 8:
print('error', cols, line)
print('Warning: Invalid Data Row', cols, line)
continue
if file_type is None:
if cols == ['exchange', 'symbol', 'timestamp', 'local_timestamp', 'id', 'side', 'price', 'amount']:
if cols == [
'exchange',
'symbol',
'timestamp',
'local_timestamp',
'id',
'side',
'price',
'amount'
]:
file_type = TRADE
elif cols == ['exchange', 'symbol', 'timestamp', 'local_timestamp', 'is_snapshot', 'side', 'price', 'amount']:
elif cols == [
'exchange',
'symbol',
'timestamp',
'local_timestamp',
'is_snapshot',
'side',
'price',
'amount'
]:
file_type = DEPTH
elif file_type == TRADE:
# Insert TRADE_EVENT
Expand Down Expand Up @@ -116,7 +139,6 @@ def convert(input_files, output_filename, buffer_size=100_000_000, base_latency=
sets.append(tmp[:row_num])

print('Merging')

data = sets[0]
del sets[0]
while len(sets) > 0:
Expand All @@ -130,7 +152,8 @@ def convert(input_files, output_filename, buffer_size=100_000_000, base_latency=
if num_corr < 0:
raise ValueError

print('Saving to %s' % output_filename)
np.savez(output_filename, data=data)
if output_filename is not None:
print('Saving to %s' % output_filename)
np.savez(output_filename, data=data)

print('Done')
return data
Empty file removed hftbacktest/utils/__init__.py
Empty file.
Empty file removed hftbacktest/utils/data/__init__.py
Empty file.
Loading

0 comments on commit b73eae7

Please sign in to comment.