-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: ENH Semi-Automatic Parser (#134)
* docs: semi-automatic parser * feat(jsonParser): semi-automatic from csv * feat(data): Informatica * feat(data): Dipartimento di Mat e Inf * Update jsonParser/semi-automatic/parse_election_results.py Co-authored-by: Stefano Borzì <[email protected]> * Update jsonParser/semi-automatic/parse_election_results.py Co-authored-by: Stefano Borzì <[email protected]> --------- Co-authored-by: Stefano Borzì <[email protected]>
- Loading branch information
Showing
4 changed files
with
896 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
## Semi-Automatic Parser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
import csv | ||
import json | ||
import click | ||
import os | ||
|
||
@click.command() | ||
@click.option('--input', '-i', help='Input CSV file', required=True) | ||
@click.option('--output', '-o', help='Output JSON file', required=True) | ||
def main(input:str, output:str): | ||
check_file_exists(input) | ||
|
||
# Read the CSV file | ||
with open(input, 'r', encoding='utf-8-sig') as csv_file: | ||
csv_reader = csv.reader(csv_file, delimiter=';') | ||
rows_list = create_list(csv_reader) | ||
|
||
# Create the dictionary that will contain the data | ||
data = { | ||
"schede": { | ||
"bianche": { | ||
"totali": 0, | ||
"seggio_n_telematico": 0 | ||
}, | ||
"nulle": { | ||
"totali": 0 | ||
}, | ||
"contestate": { | ||
"totali": 0 | ||
} | ||
}, | ||
"liste": [], | ||
"eletti": [], | ||
"non_eletti": [], | ||
} | ||
|
||
rows_list = get_name_and_seats(rows_list, data) | ||
rows_list = get_list_information(rows_list, data) | ||
rows_list = get_votation_information(rows_list, data) | ||
get_candidates_information(rows_list, data) | ||
|
||
# Convert data to JSON format | ||
json_output = json.dumps(data, indent=4, ensure_ascii=False) | ||
|
||
# Save the JSON data to a file | ||
with open(output, 'w', encoding='utf-8') as json_file: | ||
json_file.write(json_output) | ||
|
||
def check_file_exists(file_path:str) -> None: | ||
if not os.path.isfile(file_path): | ||
raise FileNotFoundError(f"File '{file_path}' not found") | ||
|
||
def create_list(csv_reader:List[str]) -> List[str]: | ||
|
||
"""Create a list of rows from the CSV file | ||
Args: | ||
csv_reader (list): The CSV reader | ||
Returns: | ||
list: The list of rows of the CSV file | ||
""" | ||
rows_list = [] | ||
for row in csv_reader: | ||
# Replace \xa0 with a space in the entire row | ||
row = [cell.replace('\xa0', ' ') for cell in row] | ||
rows_list.append(row) | ||
return rows_list | ||
|
||
def get_name_and_seats(rows_list:list, data:dict) -> list: | ||
"""Get the name of the department and the number of seats to be assigned | ||
Args: | ||
rows_list (list): The list of rows of the CSV file | ||
data (dict): The dictionary that will contain the data | ||
Returns: | ||
list: The list of rows of the CSV file | ||
""" | ||
row = rows_list[0] | ||
data["dipartimento"] = str(row[0]) | ||
row = rows_list[1] | ||
data["seggi_da_assegnare"] = row[1] | ||
rows_list = rows_list[4:] | ||
return rows_list | ||
|
||
def get_list_information(rows_list: list, data: dict) -> list: | ||
|
||
"""Get lists information | ||
Args: | ||
rows_list (list): The list of rows of the CSV file | ||
data (dict): The dictionary that will contain the data | ||
Returns: | ||
list: The list of rows of the CSV file | ||
""" | ||
count = 0 | ||
for row in rows_list: | ||
count += 1 | ||
if row[0].strip() == "TOTALE": | ||
data["liste"].append({"totale": int(row[1].strip())}) | ||
break | ||
lista = { | ||
"nome": str(row[0].strip()), | ||
"seggi": { | ||
"seggi_pieni": str(row[1].strip()), | ||
"resti": str(row[2].strip()), | ||
"seggi_ai_resti": str(row[3].strip()), | ||
"seggi_totali": str(row[4].strip()) | ||
}, | ||
"voti": { | ||
"totali": str(row[5].strip()), | ||
"seggio_telematico": str(row[-1].strip()) | ||
} | ||
} | ||
data["liste"].append(lista) | ||
rows_list = rows_list[count:] | ||
return rows_list | ||
|
||
def get_votation_information(rows_list:list, data:dict) -> list: | ||
"""Get votation information | ||
Args: | ||
rows_list (list): The list of rows of the CSV file | ||
data (dict): The dictionary that will contain the data | ||
Returns: | ||
list: The list of rows of the CSV file | ||
""" | ||
count = 0 | ||
for row in rows_list: | ||
count += 1 | ||
if "Schede Bianche" in row[0]: | ||
data["schede"]["bianche"]["totali"] = int(row[1]) | ||
data["schede"]["bianche"]["seggio_n_telematico"] = int(row[-1]) | ||
elif "Schede Nulle" in row[0]: | ||
data["schede"]["nulle"]["totali"] = int(row[-1]) | ||
elif "Schede Contestate" in row[0]: | ||
data["schede"]["contestate"]["totali"] = int(row[-1]) | ||
elif row[0].strip() == "QUOZIENTE": | ||
data["quoziente"] = float(row[1].strip().replace(",", ".")) | ||
elif row[0].strip() == "VOTANTI": | ||
data["votanti"] = { | ||
"totali": int(row[1].strip()), | ||
"percentuale": float(row[4].strip().replace(",", ".")), | ||
"seggio_n_telematico": int(row[-1].strip()) | ||
} | ||
elif row[0].strip() == "TOTALE ELETTORI AVENTI DIRITTO": | ||
data["elettori"] = { | ||
"totali": int(row[1].strip()), | ||
"seggio_n_telematico": int(row[1].strip()) | ||
} | ||
elif row[0].strip() == "PREFERENZE CANDIDATI": | ||
break | ||
rows_list = rows_list[count:] | ||
return rows_list | ||
|
||
def get_candidates_information(rows_list:list, data:dict) -> None: | ||
"""Get candidates information | ||
Args: | ||
rows_list (list): The list of rows of the CSV file | ||
data (dict): The dictionary that will contain the data | ||
""" | ||
while len(rows_list) > 0: | ||
row = rows_list[0] | ||
list_name = row[0].strip() | ||
rows_list = rows_list[2:] | ||
count = 0 | ||
for row in rows_list: | ||
count += 1 | ||
if "SEGGI" in row[0].strip(): | ||
break | ||
candidate = { | ||
"nominativo": str(row[0].strip()), | ||
"lista": list_name, | ||
"voti": { | ||
"totali": int(row[1].strip()), | ||
"seggio_telematico": int(row[-4].strip()) | ||
} | ||
} | ||
if "ELETTO" in row[3].strip(): | ||
data["eletti"].append(candidate) | ||
else: | ||
data["non_eletti"].append(candidate) | ||
rows_list = rows_list[count:] | ||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.