diff --git a/covidnpi/config.toml b/covidnpi/config.toml index ae3c00f..c03494b 100644 --- a/covidnpi/config.toml +++ b/covidnpi/config.toml @@ -14,15 +14,15 @@ movavg = 7 link = "https://cnecovid.isciii.es/covid19/resources/casos_tecnica_provincia.csv" [postal_to_code] -1 = "VI" -2 = "AB" -3 = "A" -4 = "AL" -5 = "AV" -6 = "BA" -7 = "PM" -8 = "B" -9 = "BU" +01 = "VI" +02 = "AB" +03 = "A" +04 = "AL" +05 = "AV" +06 = "BA" +07 = "PM" +08 = "B" +09 = "BU" 10 = "CC" 11 = "CA" 12 = "CS" @@ -67,7 +67,6 @@ link = "https://cnecovid.isciii.es/covid19/resources/casos_tecnica_provincia.csv 51 = "CE" 52 = "ML" - [code_to_provincia] A = "Alacant" AB = "Albacete" @@ -159,6 +158,7 @@ navarra = "NA" orense = "OR" asturias = "O" palencia = "P" +grancanaria = "GC" gran_canaria = "GC" pontevedra = "PO" salamanca = "SA" @@ -168,6 +168,7 @@ segovia = "SG" sevilla = "SE" soria = "SO" tarragona = "T" +tenerife = "TF" teruel = "TE" toledo = "TO" valencia = "V" @@ -176,6 +177,14 @@ vizcaya = "BI" zamora = "ZA" zaragoza = "Z" +[isla_to_provincia] +elhierro = "grancanaria" +formentera = "tenerife" +fuerteventura = "grancanaria" +ibiza = "tenerife" +lagomera = "grancanaria" +lanzarote = "grancanaria" +menorca = "tenerife" [code_to_poblacion] AB = 388270 diff --git a/covidnpi/preprocess_and_score.py b/covidnpi/preprocess_and_score.py index 696f963..ea7de13 100644 --- a/covidnpi/preprocess_and_score.py +++ b/covidnpi/preprocess_and_score.py @@ -10,7 +10,7 @@ store_dict_scores, store_dict_provincia_to_medidas, ) -from covidnpi.utils.logging import logger +from covidnpi.utils.log import logger from covidnpi.utils.mobility import mobility_report_to_csv from covidnpi.utils.preprocess import read_npi_and_build_dict from covidnpi.utils.taxonomia import PATH_TAXONOMIA diff --git a/covidnpi/score/score_ambitos.py b/covidnpi/score/score_ambitos.py index 375c67c..3512ff1 100644 --- a/covidnpi/score/score_ambitos.py +++ b/covidnpi/score/score_ambitos.py @@ -2,7 +2,7 @@ import typer from covidnpi.utils.dictionaries import store_dict_scores, load_dict_scores -from covidnpi.utils.logging import logger +from covidnpi.utils.log import logger from covidnpi.utils.taxonomia import return_item_ponderacion, PATH_TAXONOMIA diff --git a/covidnpi/score/score_items.py b/covidnpi/score/score_items.py index a46e940..acdd965 100644 --- a/covidnpi/score/score_items.py +++ b/covidnpi/score/score_items.py @@ -5,7 +5,7 @@ import typer from covidnpi.utils.dictionaries import store_dict_scores, load_dict_scores -from covidnpi.utils.logging import logger +from covidnpi.utils.log import logger def score_items(df: pd.DataFrame): diff --git a/covidnpi/score/score_medidas.py b/covidnpi/score/score_medidas.py index 7759d79..e740dd7 100644 --- a/covidnpi/score/score_medidas.py +++ b/covidnpi/score/score_medidas.py @@ -6,7 +6,7 @@ import typer from covidnpi.utils.dictionaries import store_dict_scores, load_dict_medidas -from covidnpi.utils.logging import logger +from covidnpi.utils.log import logger from covidnpi.utils.taxonomia import return_taxonomia, return_all_medidas warnings.simplefilter(action="ignore", category=FutureWarning) diff --git a/covidnpi/utils/combine.py b/covidnpi/utils/combine.py index 53ea0c9..d9bd7ac 100644 --- a/covidnpi/utils/combine.py +++ b/covidnpi/utils/combine.py @@ -1,8 +1,14 @@ -import pandas as pd -import typer from pathlib import Path from typing import Union +import numpy as np +import pandas as pd +import typer + +from covidnpi.utils.config import load_config +from covidnpi.utils.dictionaries import reverse_dictionary +from covidnpi.utils.log import logger + COLS_AMBITO = [ "fecha", "provincia", @@ -38,12 +44,53 @@ def combine_csv(path: Union[Path, str], colname: str) -> pd.DataFrame: return pd.concat(df_dict, names=[colname]).reset_index().drop(columns="level_1") +def add_unidad_territorial( + df: pd.DataFrame, path_config: str = "covidnpi/config.toml" +) -> pd.DataFrame: + # Load all conversion dictionaries + isle_to_province = load_config(path_config, "isla_to_provincia") + # Check for islands + unidad = df["provincia"].copy() + province = df["provincia"].replace(isle_to_province) + # Create unidad_territorial column, that contains the islands + df.insert(loc=2, column="unidad_territorial", value=unidad) + df.loc[unidad == province, "unidad_territorial"] = np.nan + df["provincia"] = province + return df + + +def add_province_code( + df: pd.DataFrame, path_config: str = "covidnpi/config.toml" +) -> pd.DataFrame: + # Load all conversion dictionaries + province_to_code = load_config(path_config, "provincia_to_code") + code_to_province = load_config(path_config, "code_to_provincia") + postal_to_code = load_config(path_config, "postal_to_code") + code_to_postal = reverse_dictionary(postal_to_code) + # Get codes + code = df["provincia"].map(province_to_code) + # Replace province name and add code + df["provincia"] = code.map(code_to_province) + df.insert(loc=1, column="cod_prov", value=code.map(code_to_postal)) + return df + + +def add_ccaa(df: pd.DataFrame, path_ccaa: str = "data/CCAA.csv") -> pd.DataFrame: + ccaa = pd.read_csv(path_ccaa, dtype={"Codigo": str, "Cod_CCAA": str}) + code_to_ccaa = dict(zip(ccaa["Codigo"], ccaa["CCAA"])) + df.insert(loc=1, column="ccaa", value=df["cod_prov"].map(code_to_ccaa)) + code_to_codccaa = dict(zip(ccaa["Codigo"], ccaa["Cod_CCAA"])) + df.insert(loc=2, column="cod_ccaa", value=df["cod_prov"].map(code_to_codccaa)) + return df + + def combine_csv_ambito( path_data: str = "output/score_ambito", path_output: str = "npi_stringency.csv" ) -> pd.DataFrame: df = combine_csv(path_data, "provincia") # Tomar las columnas relevantes y ordenar por fecha df = df[COLS_AMBITO].sort_values(["fecha", "provincia"]) + df = df.pipe(add_unidad_territorial).pipe(add_province_code).pipe(add_ccaa) df.to_csv(path_output, index=False) diff --git a/covidnpi/utils/dictionaries.py b/covidnpi/utils/dictionaries.py index 52f19c8..42fee7f 100644 --- a/covidnpi/utils/dictionaries.py +++ b/covidnpi/utils/dictionaries.py @@ -49,3 +49,8 @@ def load_dict_scores(path_scores: str = "output/score_medidas"): df = pd.read_csv(path_file, index_col="fecha") dict_scores.update({provincia: df}) return dict_scores + + +def reverse_dictionary(d: dict) -> dict: + reversed_dictionary = {value: key for (key, value) in d.items()} + return reversed_dictionary diff --git a/covidnpi/utils/logging.py b/covidnpi/utils/log.py similarity index 100% rename from covidnpi/utils/logging.py rename to covidnpi/utils/log.py diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index 229078d..5396ff0 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -4,7 +4,7 @@ from covidnpi.utils.casos import load_casos_df, return_casos_of_provincia_normed from covidnpi.utils.config import load_config -from covidnpi.utils.logging import logger +from covidnpi.utils.log import logger from covidnpi.utils.series import ( cumulative_incidence, compute_growth_rate, diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index d4ab4e0..ca3c59d 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -7,7 +7,7 @@ import xlrd from covidnpi.utils.dictionaries import store_dict_provincia_to_medidas -from covidnpi.utils.logging import logger, raise_type_warning, raise_value_warning +from covidnpi.utils.log import logger, raise_type_warning, raise_value_warning from covidnpi.utils.taxonomia import return_all_medidas, PATH_TAXONOMIA LIST_BASE_SHEET = ["base", "base-regional-provincias", "BASE"] @@ -252,15 +252,6 @@ def rename_unidad(df, rename: dict = None) -> pd.DataFrame: df = df.copy() - # Listamos los valores de unidad que no se corresponden a los esperados - list_unidad = df["unidad"].dropna().astype(str).unique() - list_unidad = sorted(set(list_unidad) - set(DICT_UNIDAD_RENAME.values())) - if len(list_unidad) > 0: - logger.warning( - f"Valores no esperados encontrados en la columna 'unidad': " - f"{', '.join(list_unidad)}" - ) - # If any value contains the exact word, change value to word list_rename = set(rename.values()) for word in list_rename: @@ -271,6 +262,15 @@ def rename_unidad(df, rename: dict = None) -> pd.DataFrame: # Rename the rest df["unidad"] = df["unidad"].replace(rename) + + # Listamos los valores de unidad que no se corresponden a los esperados + list_unidad = df["unidad"].dropna().astype(str).unique() + list_unidad = sorted(set(list_unidad) - list_rename) + if len(list_unidad) > 0: + logger.warning( + f"Valores no esperados encontrados en la columna 'unidad': " + f"{', '.join(list_unidad)}" + ) return df diff --git a/covidnpi/web/datastore.py b/covidnpi/web/datastore.py index f60f783..78eff7d 100644 --- a/covidnpi/web/datastore.py +++ b/covidnpi/web/datastore.py @@ -9,7 +9,7 @@ return_casos_of_provincia_normed, ) from covidnpi.utils.config import load_config -from covidnpi.utils.logging import logger +from covidnpi.utils.log import logger from covidnpi.utils.series import cumulative_incidence, compute_growth_rate from covidnpi.utils.taxonomia import return_taxonomia, PATH_TAXONOMIA from covidnpi.web.mongo import load_mongo diff --git a/covidnpi/web/generate_json.py b/covidnpi/web/generate_json.py index 64dba24..2c95ab7 100644 --- a/covidnpi/web/generate_json.py +++ b/covidnpi/web/generate_json.py @@ -3,7 +3,7 @@ import typer from covidnpi.utils.config import load_config -from covidnpi.utils.logging import logger +from covidnpi.utils.log import logger from covidnpi.web.mongo import load_mongo diff --git a/data/CCAA.csv b/data/CCAA.csv new file mode 100644 index 0000000..135b3b8 --- /dev/null +++ b/data/CCAA.csv @@ -0,0 +1,53 @@ +CCAA,Cod_CCAA,Provincia,Texto,Codigo +Comunitat Valenciana,10,Castelló,Castellón,12 +Andalucía,01,Huelva,Huelva,21 +Canarias,05,Santa Cruz de Tenerife,Santa Cruz de Tenerife,38 +Castilla y León,07,Palencia,Palencia,34 +Castilla - La Mancha,08,Guadalajara,Guadalajara,19 +Comunidad de Madrid,13,Madrid,Madrid,28 +Cantabria,06,Cantabria,Cantabria,39 +País Vasco,16,Bizkaia,Vizcaya,48 +Castilla y León,07,Segovia,Segovia,40 +Castilla y León,07,Zamora,Zamora,49 +Extremadura,11,Badajoz,Badajoz,06 +Aragón,02,Huesca,Huesca,22 +Galicia,12,Lugo,Lugo,27 +Illes Balears,04,Illes Balears,Islas Baleares,07 +Comunidad Foral de Navarra,15,Navarra,Navarra,31 +Comunitat Valenciana,10,Alacant,Alicante,03 +Región de Murcia,14,Murcia,Murcia,30 +Castilla y León,07,Ávila,Ávila,05 +Andalucía,01,Cádiz,Cádiz,11 +Comunitat Valenciana,10,València,Valencia,46 +Cataluña,09,Barcelona,Barcelona,08 +Castilla - La Mancha,08,Cuenca,Cuenca,16 +Castilla - La Mancha,08,Albacete,Albacete,02 +Extremadura,11,Cáceres,Cáceres,10 +Castilla - La Mancha,08,Ciudad Real,Ciudad Real,13 +Cataluña,09,Lleida,Lleida,25 +Galicia,12,Ourense,Orense,32 +Cataluña,09,Tarragona,Tarragona,43 +Castilla y León,07,Burgos,Burgos,09 +País Vasco,16,Gipuzcoa,Guipúzcoa,20 +Castilla y León,07,Salamanca,Salamanca,37 +Melilla,19,Melilla,Melilla,52 +Andalucía,01,Sevilla,Sevilla,41 +Castilla y León,07,Valladolid,Valladolid,47 +Ceuta,18,Ceuta,Ceuta,51 +Andalucía,01,Jaén,Jaén,23 +Andalucía,01,Málaga,Málaga,29 +Andalucía,01,Córdoba,Córdoba,14 +Castilla y León,07,León,León,24 +País Vasco,16,Araba,Álava,01 +Cataluña,09,Girona,Gerona,17 +Andalucía,01,Granada,Granada,18 +"Rioja, La",17,La Rioja,La Rioja,26 +Canarias,05,Las Palmas,Las Palmas,35 +Galicia,12,Pontevedra,Pontevedra,36 +"Asturias, Principado de",03,Asturias,Asturias,33 +Castilla y León,07,Soria,Soria,42 +Castilla - La Mancha,08,Toledo,Toledo,45 +Galicia,12,A Coruña,La Coruña,15 +Aragón,02,Teruel,Teruel,44 +Aragón,02,Zaragoza,Zaragoza,50 +Andalucía,01,Almería,Almería,04