-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_files.py
83 lines (70 loc) · 2.91 KB
/
read_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import json
from datetime import date
def read_specs(json_file):
# Import spec data into a variable
with open(json_file) as spec_file:
specs = json.load(spec_file)
# Check the format
if specs[0]['format'] != "delimited records":
print("I don't know how deal to with these records yet!")
exit(0)
# Extract spec-file meta data:
delimiter = specs[0]['delimiter']
data_file_type = specs[0]['data_file_type']
if data_file_type == "text":
data_file_type = "txt"
primary_key = specs[0]['column_identifier']
repeated = specs[0]['repeated_records']
meta_data = [delimiter, data_file_type, primary_key, repeated]
# Get the number of parameters
columns = specs[1]['parameters']
parameters_count = len(columns)
print(f"Number of parameters: {parameters_count}\n")
parameters = {}
for param in columns:
parameter_index = param['index']
parameter_name = param['parameter']
parameter_type = param['type']
# Set parameter type
if parameter_type == "string" or parameter_type == "str":
parameter_type = str
elif parameter_type == "num" or parameter_type == "number" or parameter_type == "int":
parameter_type = int
elif parameter_type == "date":
parameter_type = date
parameter_length = int(param['maxLength'])
parameter_required = param['required']
# Read detailed parameter conditions
if parameter_type == "email" and len(param) > 5:
xx_email = param['xx_email']
brown_email = param['brown_email']
strict_email = param['strict_email']
symbols = param['allowed_symbols']
parameters.update({parameter_index:[parameter_name, parameter_type, parameter_length, parameter_required, xx_email, brown_email, strict_email, symbols]})
elif parameter_type == date:
date_format = param['date_format']
parameters.update({parameter_index:[parameter_name, parameter_type, parameter_length, parameter_required, date_format]})
else:
parameters.update({parameter_index:[parameter_name, parameter_type, parameter_length, parameter_required]})
return meta_data, parameters
def read_data(data_file, delimiter):
processed_data = []
with open(data_file, 'r') as file:
raw_data = file.read()
record_count = 0
for line in raw_data.splitlines():
record_count += 1
record = {}
field_index = 0
for field in line.split(delimiter):
field = field.strip()
field_index += 1
if field == '':
continue
elif field == 'n/a':
field = 'N/A'
record.update({field_index:field})
processed_data.append(record)
# print(record)
print(f"Processed {record_count} records")
return processed_data