From ac7c15b4d9bcea7c84a1f67d09835c157e33f2d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fl=C3=A1vio=20Code=C3=A7o=20Coelho?= <fccoelho@gmail.com>
Date: Tue, 25 May 2021 09:39:15 -0300
Subject: [PATCH] added parameters to cases_by_age_date

---
 pysus/preprocessing/ESUS.py | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/pysus/preprocessing/ESUS.py b/pysus/preprocessing/ESUS.py
index 664634c6..8336ce0b 100644
--- a/pysus/preprocessing/ESUS.py
+++ b/pysus/preprocessing/ESUS.py
@@ -3,10 +3,12 @@
 import pandas as pd
 
 
-def cases_by_age_and_sex(UF):
+def cases_by_age_and_sex(UF, start='2020-03-01', end='2020-08-31'):
     """
-    Fetches ESUS covid line list and aggregates by age and sex returning these counts.
+    Fetches ESUS covid line list and aggregates by age and sex returning these counts between start and end dates.
     :param UF: State code
+    :param start: Start date
+    :param end: end date
     :return: dataframe
     """
     df = download(uf=UF)
@@ -18,15 +20,14 @@ def cases_by_age_and_sex(UF):
 
     # Eliminando os valores nulos nas colunas com datas importantes
     old_size = len(df)
-    df.dropna(subset = ['dataNotificacao', 'dataInicioSintomas', 'dataTeste'], inplace = True)
-    print(f"Removed {old_size-len(df)} rows with missing dates of symptoms, notification or testing")
-
+    df.dropna(subset=['dataNotificacao', 'dataInicioSintomas', 'dataTeste'], inplace=True)
+    print(f"Removed {old_size - len(df)} rows with missing dates of symptoms, notification or testing")
 
     # Desconsiderando os resultados negativos ou inconclusivos
-    df = df.loc[~df.resultadoTeste.isin(['Negativo','Inconclusivo ou Indeterminado'])]
+    df = df.loc[~df.resultadoTeste.isin(['Negativo', 'Inconclusivo ou Indeterminado'])]
 
     # Removendo sexo indeterminado
-    df = df.loc[df.sexo.isin(['Masculino','Feminino'])]
+    df = df.loc[df.sexo.isin(['Masculino', 'Feminino'])]
 
     # determinando a data dos primeiros sintomas como a data do index
 
@@ -36,20 +37,17 @@ def cases_by_age_and_sex(UF):
 
     # vamos limitar a data inicial e a data final considerando apenas a primeira onda
 
-    d1 = '2020-03-01'
-    d2 = '2020-08-31'
-
-    df = df.loc[d1:d2]
+    df = df.loc[start:end]
 
-    ini = np.arange(0,81,5)
-    fin = np.arange(5,86, 5)
-    fin[-1]=120
-    faixa_etaria = {f'[{i},{f})':(i,f) for i,f in zip(ini,fin)}
+    ini = np.arange(0, 81, 5)
+    fin = np.arange(5, 86, 5)
+    fin[-1] = 120
+    faixa_etaria = {f'[{i},{f})': (i, f) for i, f in zip(ini, fin)}
 
     labels = list(faixa_etaria.keys())
-    df['faixa_etaria'] = [labels[i-1] for i in np.digitize(df.idade,bins=ini)]
+    df['faixa_etaria'] = [labels[i - 1] for i in np.digitize(df.idade, bins=ini)]
 
     agreg = df[['sexo', 'faixa_etaria']].groupby(['faixa_etaria', 'sexo']).size()
     agreg = agreg.reset_index()
-    agreg.columns=['faixa_etaria', 'sexo','n']
+    agreg.columns = ['faixa_etaria', 'sexo', 'n']
     return agreg