-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_price_window_json_csv.py
142 lines (123 loc) · 5.12 KB
/
convert_price_window_json_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
'''
Author : Timothy Holt - [email protected]
Sep 2023
Convert Windows JSON Data to CSV and Vice Versa.
This script provides functionality to convert data between Windows JSON and CSV
formats. It reads the input file, performs the conversion, and saves the output
file. It can handle both JSON-to-CSV and CSV-to-JSON conversions.
Usage:
python script.py input_filename
The script accepts the following command-line arguments:
- input_filename (str): The name of the input file (including path).
It must be either a JSON or CSV file.
Output:
- For JSON input, it converts the data to a CSV file with the same name.
- For CSV input, it converts the data to a JSON file with the same name.
The script follows these steps:
1. Load the command-line argument as the input file.
2. Determine the file type (JSON or CSV) based on the file extension.
3. Perform the appropriate conversion.
4. Save the converted data to a new file, ensuring it does not overwrite an
existing file unless the user confirms.
Note:
- For JSON input, it assumes that the JSON file contains data in the format
expected for conversion (with specific keys for columns).
- For CSV input, it expects columns with specific labels (e.g., 'uuid',
'start_date', 'post_code', 'latitude', 'longitude', 'brand', and columns
labeled as 'p0' to 'p89' for price data).
To convert all postal regions at once use (bash):
for i in {0..9}; do python convert_price_window_json_csv.py "databases/${i}_price_windows.json"; done
'''
import pandas as pd
import json
import sys
import os
def load_argv():
try:
filename = sys.argv[1]
assert os.path.exists(filename)
print(f'Converting: {filename}')
except:
print(
'Please provide file as command line argument\n Usage: argv = [filename (including path)]\n')
exit()
return filename
def check_for_overwrite(export_name):
while os.path.exists(export_name):
textin = input(f'file: {export_name} already exists. Overwrite? (y/n): ')
if textin.lower() == 'y':
break
else:
export_name = input('Please provide new export filename (or exit): ')
if export_name.lower() == 'exit':
exit()
return export_name
def load_windows_json(filename):
with open(filename, 'r') as f:
d = json.load(f)
return d
def windows_dict_to_df(d):
# columns = ['uuid', 'start_date', 'post_code', 'latitude', 'longitude', 'brand', p0, ..., p89]
price_labels = [f'p{i}' for i in range(90)]
det_price_labels = [f'det_p{i}' for i in range(90)]
new_d = {}
for i, (k, v) in enumerate(d.items()):
subdict = {
'uuid' : k.split('_')[0],
'post_code' : v['post_code'],
'brand' : v['brand'],
'latitude' : v['geotag'][0],
'longitude' : v['geotag'][1],
'start_date' : v['start_date'],
}
prices = dict(zip(price_labels, v['price']))
subdict.update(prices)
if 'detrended_price' in v:
detrended_prices = dict(zip(det_price_labels, v['detrended_price']))
subdict.update(detrended_prices)
new_d[i] = subdict
df = pd.DataFrame.from_dict(new_d, orient='index')
df.sort_values(['post_code', 'uuid', 'start_date'], inplace=True)
df.reset_index(drop=True, inplace=True)
return df
filename = load_argv()
path_name = filename.split('.')[0]
extension = filename.split('.')[-1]
if extension not in {'csv', 'json'}:
print('Not a valid file type. File must be .csv or .json')
exit()
if extension == 'json':
d = load_windows_json(filename)
df = windows_dict_to_df(d)
print(df)
export_name = path_name + '.csv'
export_name = check_for_overwrite(export_name)
df.to_csv(export_name, index=False)
elif extension == 'csv':
df = pd.read_csv(filename, dtype={'start_date': 'str', 'post_code': 'str'})
data_column_labels = ['uuid', 'start_date', 'post_code', 'latitude', 'longitude', 'brand']
price_column_labels = [f'p{i}' for i in range(90)]
det_price_column_labels = [f'det_p{i}' for i in range(90)]
price_matrix = df[price_column_labels].to_numpy()
if det_price_column_labels[0] in df.columns:
det_price_matrix = df[det_price_column_labels].to_numpy()
sub_df = df[data_column_labels]
outer_dict = {}
for i, obs in sub_df.iterrows():
key = f"{obs['uuid']}_{obs['start_date']}_{len(price_column_labels)}"
inner_dict = {
'uuid': obs['uuid'],
'start_date': obs['start_date'],
'post_code': obs['post_code'],
'geotag': (obs['latitude'], obs['longitude']),
'brand': obs['brand'],
}
inner_dict['price'] = price_matrix[i,:].tolist()
if det_price_column_labels[0] in df.columns:
inner_dict['detrended_price'] = det_price_matrix[i,:].tolist()
outer_dict[key] = inner_dict
export_name = path_name + '.json'
export_name = check_for_overwrite(export_name)
with open(export_name, 'w') as f:
export_json = json.dumps(outer_dict, indent=4)
f.write(export_json)