Add sonej code

eebersol · Mar 15, 2019 · 29d9384 · 29d9384
1 parent de18594
commit 29d9384
Show file tree

Hide file tree

Showing 7 changed files with 3,512 additions and 3,449 deletions.
diff --git a/merge_result.py b/merge_result.py
@@ -1,11 +1,12 @@
-
+def rmv_b(country): # Permet de supprimer le b' qui ce met souvent en general avant le nom du pays, ceci a un rapport avec le type de la variables qui serait un byte ?
+    return country.replace("b'", "")
 
 def print_result():
     f = open("result_per_years.txt", "w")
+
     f.write("country, population, percent, suicide_nbr, year\n")
     for line in finallist:
-        finalline = str(line['country']) + ',' + str(line['population']) + ',' + str(line['percent']) + ',' + str(line['suicide_nbr']) + ',' + str(line['year']) + '\n'
-        f.write(finalline)
+        f.write(str(line['country']) + ',' +  str(line['population']) + ',' + str(line['percent']) + ',' + str(line['suicide_nbr']) + ',' + str(line['year']) + '\n')
 
 def get_data(country, res_pib_country):
 
@@ -17,21 +18,28 @@ def get_data(country, res_pib_country):
             return line[33]
     return ""
 
-def merge_data():
-
+def merge_data(): # on souhaite merge result_per_country et result_pid_country pour garder que les infornmations significatives
     finalist = []
+
+    # On open les deux fichiers
     with open('result_per_country.txt','rb') as f:
         res_per_country = list(f)
+        del res_per_country[0]
     with open('result_pib_country.txt','rb') as f:
         res_pib_country = list(f)
+        del res_pib_country[0]
+
+    # On créer et prépare l'entête du nouveau fichier
     f = open("result_final.txt", "w")
     f.write("country,average_suicide,average_pib\n")
-    for elem in res_per_country:
-
-        line = str(elem).split(",")
-        line[0] = line[0].replace("b'", "")
 
-        data = get_data(line[0], res_pib_country)
+    # On parcours le fichier result_per_country
+    for elem in res_per_country:
+        line    = str(elem).split(",")
+        line[0] = rmv_b(line[0])
+        # On recupere dans le fichier result_pib_country, le pays passeé en param
+        data    = get_data(line[0], res_pib_country)
+        # Si un pays est trouvé on exploit la data
         if len(data) > 1:
             f.write(line[0] + ',' + line[1] + '%,' + data[:(len(data)-3)] + '\n')
 

diff --git a/parse.py b/parse.py
@@ -1,79 +1,90 @@
-finallist = []
+finallist = [] # variable global permettant de stocker la liste final
 
-def print_result_per_year():
-    f = open("result_per_years.txt", "w")
-    f.write("country, population, percent, suicide_nbr, year\n")
+def rmv_b(country): # Permet de supprimer le b' qui ce met souvent en general avant le nom du pays, ceci a un rapport avec le type de la variables qui serait un byte ?
+    return country.replace("b'", "")
+
+def print_result_per_year(): # Print le resultat de la premiere merge
+    newf = open("result_per_years.txt", "w")
+
+    newf.write("country, population, suicide_nbr, year\n")
     for line in finallist:
-        finalline = str(line['country']) + ',' + str(line['population']) + ',' + str(line['percent']) + ',' + str(line['suicide_nbr']) + ',' + str(line['year']) + '\n'
-        f.write(finalline)
+        newf.write(str(line['country']) + ',' + \
+                        str(line['population']) + ',' + \
+                            str(line['suicide_nbr']) + ',' + \
+                                str(line['year']) + '\n')
+
+def print_result_per_country(country, fd): # Merge les differentes annees pour un meme pays, en une seule ligne
+    population  = 0
+    suicides    = 0
+    years       = 0
 
-def print_result_per_country(country, f):
-    i = 0
-    percent_average = 0
-    suicide_nbr = 0
-    population = 0
     for elem in finallist:
         if country == elem['country'] and elem['population'] > 0:
-            i += 1
-            percent_average += (elem['population'] * elem["percent"])/100
-            population = elem['population']
-            suicide_nbr += elem['suicide_nbr']
-    if i > 0:
-        percent_average_suicide = float(((100 * suicide_nbr)/(population))/i)
+            population  = elem['population']
+            suicides    += elem['suicide_nbr']
+            years       += 1
+    if years > 0:
+        percent_average_suicide = float(((100 * suicides)/(population))/years)
         percent_average_suicide = '%.4f'%(percent_average_suicide)
-        finalline = country + ',' + str(percent_average_suicide) + ',' + str(suicide_nbr/i) + '\n'
-        f.write(finalline)
+        fd.write(country + ',' + str(percent_average_suicide) + ',' + str(suicides/years) + '\n')
 
-def get_average_per_year(dataset, country, year):
-    percent = 0
-    suicide = 0
-    population = 0
+def get_average_per_year(dataset, country, year): # Merge les differentes data pour un meme pays et une meme date
+    suicide     = 0
+    population  = 0
+
     for line in dataset:
         line = str(line).split(",")
-        line[0] = line[0].replace("b'", "")
-        if country == line[0] and year == int(line[1]):
-            suicide += int(line[4])
-            population += int(line[5])
-    if population > 0:
-        percent = (100 * int(suicide)) / float(population)
-    obj = {
-        'country':country,
-        'year':year,
-        'suicide_nbr':suicide,
-        'percent':percent,
-        'population':population
-    }
-    finallist.append(obj)
-    year += 1
-    if year == 2016:
-        return
+        # Si c'est le bon pays et la bonne anée alors tu concats les informations
+        if country == rmv_b(line[0]) and year == int(line[1]):
+            suicide     += int(line[4])
+            population  += int(line[5])        
+
+    finallist.append({
+        'country'       : country,
+        'year'          : year,
+        'suicide_nbr'   : suicide,
+        'population'    : population
+    })
+
+    if year + 1 == 2017:
+        return 
     else:
-        get_average_per_year(dataset, country, year)
+        get_average_per_year(dataset, country, year + 1)
 
-def get_country(dataset):
-    country = "Albania"
+def get_country(dataset): # Recupère la liste de pays
     countryList = []
+    country     = 0
+
     for line in dataset:
         line = str(line).split(",")
-        if line[0] != country:
+        if country and line[0] != country:
             countryList.append(country)
         country = line[0]
     return countryList
 
 with open('master.csv','rb') as f:
-    mylist = list(f)
-    countryList =  get_country(mylist)
-    for country in countryList:
-        country = country.replace("b'", "")
-        print(country)
-        get_average_per_year(mylist, country, 1985)
-    f = open("result_per_country.txt", "w")
-    f.write("country, percent_average, suicide_average\n")
-    for country in countryList:
-        country = country.replace("b'", "")
-        print_result_per_country(country, f)
+    dataset         = list(f)
+    # Recupere la liste des pays
+    countries       = get_country(dataset)
+    lencountries    = len(countries)
+    i               = 0
+
+    # Merge les lignes ayant le meme pays et la meme annee
+    for country in countries:
+        country = rmv_b(country)
+        print(country, str(i) + "/" + str(lencountries))
+        get_average_per_year(dataset, country, 1985)
+        i += 1
+
+    # Print le resultat de la merge
     print_result_per_year()
 
+    # Merge les differentes annees pour un meme pays, en une seule ligne
+    fd = open("result_per_country.txt", "w")    
+    fd.write("country, percent_average, suicide_average\n")
+    for country in countries:
+        print_result_per_country(rmv_b(country), fd)
+
 
 
 

diff --git a/parse_pib.py b/parse_pib.py
@@ -1,56 +1,23 @@
-import csv
-
-
-
-
-"Country Name",
-
-"1985"
-"1986"
-"1987"
-"1988"
-"1989"
-"1990"
-"1991"
-"1992"
-"1993"
-"1994"
-"1995"
-"1996"
-"1997"
-"1998"
-"1999"
-"2000"
-"2001"
-"2002"
-"2003"
-"2004"
-"2005"
-"2006"
-"2007"
-"2008"
-"2009"
-"2010"
-"2011"
-"2012"
-"2013"
-"2014"
-"2015"
-"2016"
-"2017"
-"average"
-
+def rmv_b(country): # Permet de supprimer le b' qui ce met souvent en general avant le nom du pays, ceci a un rapport avec le type de la variables qui serait un byte ?
+    return country.replace("b'", "")
 
 def print_csv(newlist):
     f = open("result_pib_country.txt", "w")
+
     f.write("country, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, average\n")
     for line in newlist:
-        finalline = line['name'] + "," + line['1985'] + "," + line['1986'] + "," + line['1987'] + "," + line['1988'] + "," + line['1989'] + "," + line['1990'] + "," + line['1991'] + "," + line['1992'] + "," + line['1993'] + "," + line['1994'] + "," + line['1995'] + "," + line['1996'] + "," + line['1997'] + "," + line['1998'] + "," + line['1999'] + "," + line['2000'] + "," + line['2001'] + "," + line['2002'] + "," + line['2003'] + "," + line['2004'] + "," + line['2005'] + "," + line['2006'] + "," + line['2007'] + "," + line['2008'] + "," + line['2009'] + "," + line['2010'] + "," + line['2011'] + "," + line['2012'] + "," + line['2013'] + "," + line['2014'] + "," + line['2015'] + "," + line['2016'] + "," + str(line['average'])
-        f.write(finalline + "\n")
+        f.write(rmv_b(line['name']) + "," + \
+                    line['1985'] + "," + line['1986'] + "," + line['1987'] + "," + line['1988'] + "," + line['1989'] + "," + line['1990'] + "," + \
+                        line['1991'] + "," + line['1992'] + "," + line['1993'] + "," + line['1994'] + "," + line['1995'] + "," + line['1996'] + "," + \
+                            line['1997'] + "," + line['1998'] + "," + line['1999'] + "," + line['2000'] + "," + line['2001'] + "," + line['2002'] + "," + \
+                                line['2003'] + "," + line['2004'] + "," + line['2005'] + "," + line['2006'] + "," + line['2007'] + "," + line['2008'] + "," + \
+                                    line['2009'] + "," + line['2010'] + "," + line['2011'] + "," + line['2012'] + "," + line['2013'] + "," + line['2014'] + "," + \
+                                        line['2015'] + "," + line['2016'] + "," + str(line['average']) + '\n')
 
 def get_country():
     countryList = []
-    country = "Albania"
+    country     = 0
+
     with open('master.csv','rb') as f:
         dataset = list(f)
         for line in dataset:
@@ -63,29 +30,32 @@ def get_country():
 def make_average(line):
     average = 0
     counter = 0
-    i = 0
+    i       = 0
+
     for data in line:
         data = data.replace('"', "")
-        if i >= 19 and i <= 60 and data:
+        if i >= 29 and i <= 60 and data:
             if len(data) > 2:
                 counter += 1
                 average += float(data)
         i += 1
     return average/counter
 
-
-def test(data, countryList):
-    print(data in countryList)
-
 def exploit_pib():
+    # On recupere la liste de pays dont nous avons les informations a propos des suicides
     countryList = get_country()
-    newlist = []
+    newlist     = []
+    # On lit pib.csv
     with open('pib.csv','rb') as f:
+         #On met les data dans une liste
          piblist = list(f)
+
+         # On parcours la liste pour get les informations dont nous avons besoin
          for line in piblist:
-            line = line.split(',')
+            line    = str(line).split(',')
             line[0] = line[0].replace('"', '')
-            if line[0]in countryList:
+
+            if line[0] in countryList:
                 country_data = {
                     'name': line[0],
                     "1985": line[29],
@@ -123,6 +93,7 @@ def exploit_pib():
                     "average": make_average(line)
                 }
                 newlist.append(country_data)
+    # On display print le resultat
     print_csv(newlist)