From ac7c15b4d9bcea7c84a1f67d09835c157e33f2d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=A1vio=20Code=C3=A7o=20Coelho?= Date: Tue, 25 May 2021 09:39:15 -0300 Subject: [PATCH] added parameters to cases_by_age_date --- pysus/preprocessing/ESUS.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/pysus/preprocessing/ESUS.py b/pysus/preprocessing/ESUS.py index 664634c6..8336ce0b 100644 --- a/pysus/preprocessing/ESUS.py +++ b/pysus/preprocessing/ESUS.py @@ -3,10 +3,12 @@ import pandas as pd -def cases_by_age_and_sex(UF): +def cases_by_age_and_sex(UF, start='2020-03-01', end='2020-08-31'): """ - Fetches ESUS covid line list and aggregates by age and sex returning these counts. + Fetches ESUS covid line list and aggregates by age and sex returning these counts between start and end dates. :param UF: State code + :param start: Start date + :param end: end date :return: dataframe """ df = download(uf=UF) @@ -18,15 +20,14 @@ def cases_by_age_and_sex(UF): # Eliminando os valores nulos nas colunas com datas importantes old_size = len(df) - df.dropna(subset = ['dataNotificacao', 'dataInicioSintomas', 'dataTeste'], inplace = True) - print(f"Removed {old_size-len(df)} rows with missing dates of symptoms, notification or testing") - + df.dropna(subset=['dataNotificacao', 'dataInicioSintomas', 'dataTeste'], inplace=True) + print(f"Removed {old_size - len(df)} rows with missing dates of symptoms, notification or testing") # Desconsiderando os resultados negativos ou inconclusivos - df = df.loc[~df.resultadoTeste.isin(['Negativo','Inconclusivo ou Indeterminado'])] + df = df.loc[~df.resultadoTeste.isin(['Negativo', 'Inconclusivo ou Indeterminado'])] # Removendo sexo indeterminado - df = df.loc[df.sexo.isin(['Masculino','Feminino'])] + df = df.loc[df.sexo.isin(['Masculino', 'Feminino'])] # determinando a data dos primeiros sintomas como a data do index @@ -36,20 +37,17 @@ def cases_by_age_and_sex(UF): # vamos limitar a data inicial e a data final considerando apenas a primeira onda - d1 = '2020-03-01' - d2 = '2020-08-31' - - df = df.loc[d1:d2] + df = df.loc[start:end] - ini = np.arange(0,81,5) - fin = np.arange(5,86, 5) - fin[-1]=120 - faixa_etaria = {f'[{i},{f})':(i,f) for i,f in zip(ini,fin)} + ini = np.arange(0, 81, 5) + fin = np.arange(5, 86, 5) + fin[-1] = 120 + faixa_etaria = {f'[{i},{f})': (i, f) for i, f in zip(ini, fin)} labels = list(faixa_etaria.keys()) - df['faixa_etaria'] = [labels[i-1] for i in np.digitize(df.idade,bins=ini)] + df['faixa_etaria'] = [labels[i - 1] for i in np.digitize(df.idade, bins=ini)] agreg = df[['sexo', 'faixa_etaria']].groupby(['faixa_etaria', 'sexo']).size() agreg = agreg.reset_index() - agreg.columns=['faixa_etaria', 'sexo','n'] + agreg.columns = ['faixa_etaria', 'sexo', 'n'] return agreg