-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathhelpers.py
59 lines (47 loc) · 1.84 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import json
import os
import shutil
from collections import namedtuple
from datetime import datetime
item_fields = ['tx_id', 'tx_time', 'buyer', 'currency_code', 'payment_type', 'credit_card_number', 'country', 'department', 'product', 'item_price', 'coupon_code', 'was_returned']
Item = namedtuple('Item', item_fields)
def parse_item(raw_string):
f = raw_string.split('|')
f += [None] * (len(item_fields) - len(f))
return Item(*f)
# Thing = namedtuple('Item', ['foo', 'bar'])
# some = Thing(foo=42, bar='hello')
# some.foo
# item = parse_item(["one", "two"])
# new_item = item._replace(tx_id=1, buyer=5)
# API http://fixer.io/
def get_usd_exchange_rates():
with open('./data/exchange_rates_usd.json') as f:
data = json.load(f)
return data['rates']
container_fields = ['ship_imo', 'ship_name', 'country', 'departure', 'container_id', 'container_type', 'container_group', 'net_weight', 'gross_weight', 'owner', 'declared', 'contact', 'customs_ok']
Container = namedtuple('Container', container_fields)
def parse_container(raw_string):
f = raw_string.split(';')
f += [None] * (len(container_fields) - len(f))
return Container(*f)
stock_fields = ['simbolo', 'numero', 'precio_compra', 'ultimo_precio', 'returns']
Stock = namedtuple('Stock', stock_fields)
def parse_stock(raw_string):
f = raw_string.split(',')
return Stock(simbolo=f[0], numero=None, precio_compra=None, ultimo_precio=float(f[1]), returns=0.0)
def setup_checkpoint(streamingContext):
checkpoint = './checkpoint'
if (os.path.exists(checkpoint)):
shutil.rmtree(checkpoint)
os.mkdir(checkpoint)
streamingContext.checkpoint(checkpoint)
def isoDate(raw_string):
try:
return datetime.strptime(raw_string, "%Y-%m-%dT%H:%M:%SZ")
except Exception:
return None
def dataUrl(fileName):
base = "./data"
# base = "gs://bigdataupv_data"
return os.path.join(base, fileName)