-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.py
99 lines (78 loc) · 3.24 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import copy
finallist = [] # variable global permettant de stocker la liste final
def rmv_b(country): # Permet de supprimer le b' qui ce met souvent en general avant le nom du pays, ceci a un rapport avec le type de la variables qui serait un byte ?
return country.replace("b'", "")
def print_result_per_year(): # Print le resultat de la premiere merge
newf = open("result_per_years.txt", "w")
newf.write("country, population, suicide_nbr, year\n")
for line in finallist:
newf.write(str(line['country']) + ',' + \
str(line['population']) + ',' + \
str(line['suicide_nbr']) + ',' + \
str(line['year']) + '\n')
def print_result_per_country(country, fd): # Merge les differentes annees pour un meme pays, en une seule ligne
population = 0
suicides = 0
years = 0
for elem in finallist:
if country == elem['country'] and elem['population'] > 0:
population = elem['population']
suicides += elem['suicide_nbr']
years += 1
if years > 0:
percent_average_suicide = float(((100 * suicides)/(population))/years)
percent_average_suicide = '%.4f'%(percent_average_suicide)
fd.write(country + ',' + str(percent_average_suicide) + ',' + str(suicides/years) + '\n')
def get_average_per_year(dataset, country, year): # Merge les differentes data pour un meme pays et une meme date
suicide = 0
population = 0
tmp = copy.copy(dataset)
j = 0
for line in tmp:
toto = str(line).split(",")
# Si c'est le bon pays et la bonne anée alors tu concats les informations
if country == rmv_b(toto[0]) and year == int(toto[1]):
suicide += int(toto[4])
population += int(toto[5])
del dataset[j]
j += 1
# print(len(dataset))
finallist.append({
'country' : country,
'year' : year,
'suicide_nbr' : suicide,
'population' : population
})
if year + 1 == 2017:
return
else:
get_average_per_year(dataset, country, year + 1)
def get_country(dataset): # Recupère la liste de pays
countryList = []
country = 0
for line in dataset:
line = str(line).split(",")
if country and line[0] != country:
countryList.append(country)
country = line[0]
return countryList
with open('master.csv','rb') as f:
dataset = list(f)
# Recupere la liste des pays
countries = get_country(dataset)
lencountries = len(countries)
i = 0
# Merge les lignes ayant le meme pays et la meme annee
for country in countries:
country = rmv_b(country)
print(country, str(i) + "/" + str(lencountries))
get_average_per_year(dataset, country, 1985)
print(len(dataset))
i += 1
# Print le resultat de la merge
print_result_per_year()
# Merge les differentes annees pour un meme pays, en une seule ligne
fd = open("result_per_country.txt", "w")
fd.write("country, percent_average, suicide_average\n")
for country in countries:
print_result_per_country(rmv_b(country), fd)