-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathio_tools.py
116 lines (105 loc) · 2.76 KB
/
io_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/python
# -*- coding: utf-8 -*-
import csv, codecs, sys
def readLines(outfile, delimiter):
data = []
for line in outfile:
data.append(line.strip().split(delimiter))
return data
def getCsv(data):
out_data = []
data_dict = {}
for i in range(len(data)):
if i == 0: # assuming there is a header
keys = data[i] # Title\tWolf Title\tArtist\tGenre\tISRC\twolf_id\twolf_artist_id
else:
for j in range(len(keys)):
try:
data_dict[keys[j]] = data[i][j]
except:
print('WARNING line %s not normalized, substituting empty string. '%data[i])
data_dict[keys[j]]= ''
pass
out_data.append(data_dict)
data_dict = {}
return keys, out_data
def getCsvReader(filename, delimiter, line_start=0):
out_data = []
data_dict = {}
fin = codecs.open(filename, 'rU')#, encoding='utf8')
reader = csv.reader(fin, delimiter=delimiter, quotechar='"', quoting=csv.QUOTE_MINIMAL)
i = 0
for row in reader:
if i < line_start+1:
keys = row
else:
for j in range(len(keys)):
try:
data_dict[keys[j]] = row[j].strip()
except:
print('WARNING line %s not normalized, substituting empty string. '%row)
data_dict[keys[j]]= ''
pass
out_data.append(data_dict)
data_dict = {}
i += 1
return keys, out_data
def csvFormattedOut(row,keys):
'''
row in dictionary
keys in list
converts rows to a list with the key order
'''
lst = []
for key in keys:
#try:
lst.append(row[key])
#except:
#not found substuting empty string
#lst.append('""')
return lst
def outCsv(keys, data, csv_out_name, delimiter=','):
csv_out = codecs.open(csv_out_name, 'w+')
out_writer = csv.writer(csv_out, delimiter=delimiter, quotechar='"')#, quoting=csv.QUOTE_MINIMAL)
out_writer.writerow(keys)
for d in data:
for key in keys:
try:
d[key]
except:
d[key] = ''
#print key, d[key]
try:
out_writer.writerow(csvFormattedOut(d,keys))
except:
print("line could not be written with error: %s"%sys.exc_info()[0])
print("> with keys: %s"%'\t'.join(d.keys()))
for k in d.keys():
print("%s: %s"%(k, d[k]),)
print
#print "> %s"%'\t'.join(d)
sys.exit()
csv_out.close()
def outCsvInfo(keys, data, csv_out_name):
csv_out = codecs.open(csv_out_name, 'w+')
out_writer = csv.writer(csv_out, delimiter=',', quotechar='"')#, quoting=csv.QUOTE_MINIMAL)
out_writer.writerow(keys)
for info in data:
d = info[1]
for key in keys:
try:
d[key]
except:
d[key] = ''
#print key, d[key]
try:
out_writer.writerow(csvFormattedOut(d,keys))
except:
print("line could not be written with error: %s"%sys.exc_info()[0])
print("> with keys: %s"%'\t'.join(d.keys()))
for k in d.keys():
print("%s: %s"%(k, d[k]), )
print
#print "> %s"%'\t'.join(d)
#sys.exit()
csv_out.close()