Skip to content

Commit

Permalink
Use OCHA-DAP secrets
Browse files Browse the repository at this point in the history
Refactor to make it easier to understand
  • Loading branch information
mcarans committed Aug 22, 2024
1 parent 30169a5 commit 78f7c4d
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 156 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/run-python-script.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
env:
HDX_SITE: ${{ secrets.HDX_SITE }}
HDX_KEY: ${{ secrets.HDX_BOT_SCRAPERS_API_TOKEN }}
PREPREFIX: ${{ secrets.PREPREFIX }}
PREPREFIX: ${{ secrets.HDX_PIPELINE_PREPREFIX }}
USER_AGENT: ${{ secrets.USER_AGENT }}
EXTRA_PARAMS: ${{ secrets.EXTRA_PARAMS }}
run: |
Expand All @@ -42,12 +42,12 @@ jobs:
if: failure()
uses: dawidd6/action-send-mail@v3
with:
server_address: ${{secrets.EMAIL_SERVER}}
server_port: ${{secrets.EMAIL_PORT}}
username: ${{secrets.EMAIL_USERNAME}}
password: ${{secrets.EMAIL_PASSWORD}}
server_address: ${{secrets.HDX_PIPELINE_EMAIL_SERVER}}
server_port: ${{secrets.HDX_PIPELINE_EMAIL_PORT}}
username: ${{secrets.HDX_PIPELINE_EMAIL_USERNAME}}
password: ${{secrets.HDX_PIPELINE_EMAIL_PASSWORD}}
subject: "FAILED: ${{github.repository}} run job"
body: GitHub Actions run job for ${{github.repository}} failed!
to: ${{secrets.EMAIL_LIST}}
from: ${{secrets.EMAIL_FROM}}
to: ${{secrets.HDX_PIPELINE_EMAIL_LIST}}
from: ${{secrets.HDX_PIPELINE_EMAIL_FROM}}
content_type: text/html
291 changes: 149 additions & 142 deletions ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,144 @@ def get_countries(self):
countryisos.add(countryiso3)
return [{"iso3": x} for x in sorted(countryisos)]

@staticmethod
def parse_date(datestring):
date = datetime.strptime(datestring, "%b %Y")
return date.replace(tzinfo=timezone.utc)

@classmethod
def parse_date_range(cls, date_range, time_period):
start, end = date_range.split(" - ")
startdate = cls.parse_date(start)
if startdate < time_period["start_date"]:
time_period["start_date"] = startdate
enddate = cls.parse_date(end)
enddate = enddate + relativedelta(months=1, days=-1)
if enddate > time_period["end_date"]:
time_period["end_date"] = enddate
startdatestr = startdate.date().isoformat()
enddatestr = enddate.date().isoformat()
return startdatestr, enddatestr

def add_country_subnational_rows(
self,
base_row,
time_period,
location,
rows,
rows_wide,
analysis=None,
):
if analysis is None:
analysis = location
country_subnational_row = deepcopy(base_row)
row_wide = deepcopy(country_subnational_row)
for i, projection in enumerate(self.projections):
projection_row = deepcopy(country_subnational_row)
period_date = analysis.get(f"{projection}_period_dates")
if period_date:
period_start, period_end = self.parse_date_range(period_date,
time_period)
else:
period_start = period_end = None
projection_row["Validity period"] = projection
projection_row["From"] = period_start
projection_row["To"] = period_end
projection_name = self.projection_names[i]
projection_suffix = self.projection_suffixes[i]
row_wide[f"{projection_name} from"] = period_start
row_wide[f"{projection_name} to"] = period_end
location[f"estimated_percentage{projection_suffix}"] = 1.0
for prefix, phase in self.phasemapping.items():
row = deepcopy(projection_row)
if phase == "3+":
key = f"p3plus{projection_suffix}"
else:
key = f"{prefix}_population{projection_suffix}"
affected = location.get(key)
row["Phase"] = phase
row["Number"] = affected
projection_name_l = projection_name.lower()
if phase == "all":
colname = f"Population analyzed {projection_name_l}"
else:
colname = f"Phase {phase} number {projection_name_l}"
row_wide[colname] = affected
percentage = location.get(
f"{prefix}_percentage{projection_suffix}")
row["Percentage"] = percentage
if prefix != "estimated":
row_wide[
f"Phase {phase} percentage {projection_name_l}"
] = percentage
if affected is not None and period_date:
rows.append(row)

rows_wide.append(row_wide)

@staticmethod
def get_base_row(analysis, countryiso3):
return {
"Date of analysis": analysis["analysis_date"],
"Country": countryiso3,
"Total country population": analysis.get("population"),
}

def add_country_rows(self, analysis, countryiso3, time_period, rows,
rows_wide):
base_row = self.get_base_row(analysis, countryiso3)
self.add_country_subnational_rows(
base_row,
time_period,
analysis,
rows=rows,
rows_wide=rows_wide,
)

def add_subnational_rows(self,
analysis, countryiso3, time_period, group_rows,
group_rows_wide, area_rows,
area_rows_wide
):
def process_areas(adm_row, adm):
if adm["areas"] is None:
logger.error(
f"{countryiso3}: {analysis['title']} has blank \"areas\" field!"
)
return
for area in adm["areas"]:
area_row = deepcopy(adm_row)
if "Level 1" not in area_row:
area_row["Level 1"] = None
area_row["Area"] = area["name"]
self.add_country_subnational_rows(
area_row,
time_period,
area,
rows=area_rows,
rows_wide=area_rows_wide,
analysis=analysis,
)

base_row = self.get_base_row(analysis, countryiso3)
groups = analysis.get("groups")
if groups:
for group in analysis["groups"]:
group_row = deepcopy(base_row)
group_row["Level 1"] = group["name"]
self.add_country_subnational_rows(
group_row,
time_period,
group,
rows=group_rows,
rows_wide=group_rows_wide,
analysis=analysis,
)
if "areas" in group:
process_areas(group_row, group)
else:
process_areas(base_row, analysis)

def get_country_data(self, countryiso3):
countryiso2 = Country.get_iso2_from_iso3(countryiso3)
url = f"{self.base_url}/population?country={countryiso2}"
Expand All @@ -91,11 +229,7 @@ def get_country_data(self, countryiso3):
return None
most_recent_analysis = country_data[0]

def parse_date(datestring):
date = datetime.strptime(datestring, "%b %Y")
return date.replace(tzinfo=timezone.utc)

analysis_date = parse_date(most_recent_analysis["analysis_date"])
analysis_date = self.parse_date(most_recent_analysis["analysis_date"])
if analysis_date <= self.state.get(countryiso3,
self.default_start_date):
update = False
Expand All @@ -104,143 +238,18 @@ def parse_date(datestring):
self.state[countryiso3] = analysis_date
time_period = {"start_date": default_enddate, "end_date": default_date}

def parse_date_range(date_range):
start, end = date_range.split(" - ")
startdate = parse_date(start)
if startdate < time_period["start_date"]:
time_period["start_date"] = startdate
enddate = parse_date(end)
enddate = enddate + relativedelta(months=1, days=-1)
if enddate > time_period["end_date"]:
time_period["end_date"] = enddate
startdatestr = startdate.date().isoformat()
enddatestr = enddate.date().isoformat()
return startdatestr, enddatestr

def add_country_subnational_rows(
base_row,
location,
rows,
rows_wide,
analysis=None,
):
if analysis is None:
analysis = location
country_subnational_row = deepcopy(base_row)
row_wide = deepcopy(country_subnational_row)
for i, projection in enumerate(self.projections):
projection_row = deepcopy(country_subnational_row)
period_date = analysis.get(f"{projection}_period_dates")
if period_date:
period_start, period_end = parse_date_range(period_date)
else:
period_start = period_end = None
projection_row["Validity period"] = projection
projection_row["From"] = period_start
projection_row["To"] = period_end
projection_name = self.projection_names[i]
projection_suffix = self.projection_suffixes[i]
row_wide[f"{projection_name} from"] = period_start
row_wide[f"{projection_name} to"] = period_end
location[f"estimated_percentage{projection_suffix}"] = 1.0
for prefix, phase in self.phasemapping.items():
row = deepcopy(projection_row)
if phase == "3+":
key = f"p3plus{projection_suffix}"
else:
key = f"{prefix}_population{projection_suffix}"
affected = location.get(key)
row["Phase"] = phase
row["Number"] = affected
projection_name_l = projection_name.lower()
if phase == "all":
colname = f"Population analyzed {projection_name_l}"
else:
colname = f"Phase {phase} number {projection_name_l}"
row_wide[colname] = affected
percentage = location.get(
f"{prefix}_percentage{projection_suffix}")
row["Percentage"] = percentage
if prefix != "estimated":
row_wide[
f"Phase {phase} percentage {projection_name_l}"
] = percentage
if affected is not None and period_date:
rows.append(row)

rows_wide.append(row_wide)

def get_base_row(analysis):
return {
"Date of analysis": analysis["analysis_date"],
"Country": countryiso3,
"Total country population": analysis.get("population"),
}

def add_country_rows(analysis, rows, rows_wide):
base_row = get_base_row(analysis)
add_country_subnational_rows(
base_row,
analysis,
rows=rows,
rows_wide=rows_wide,
)

def add_subnational_rows(
analysis, group_rows, group_rows_wide, area_rows,
area_rows_wide
):
def process_areas(adm_row, adm):
if adm["areas"] is None:
logger.error(
f"{countryiso3}: {analysis['title']} has blank \"areas\" field!"
)
return
for area in adm["areas"]:
area_row = deepcopy(adm_row)
if "Level 1" not in area_row:
area_row["Level 1"] = None
area_row["Area"] = area["name"]
add_country_subnational_rows(
area_row,
area,
rows=area_rows,
rows_wide=area_rows_wide,
analysis=analysis,
)

base_row = get_base_row(analysis)
groups = analysis.get("groups")
if groups:
for group in analysis["groups"]:
group_row = deepcopy(base_row)
group_row["Level 1"] = group["name"]
add_country_subnational_rows(
group_row,
group,
rows=group_rows,
rows_wide=group_rows_wide,
analysis=analysis,
)
if "areas" in group:
process_areas(group_row, group)
else:
process_areas(base_row, analysis)

output = {"countryiso3": countryiso3}
country_rows = output["country_rows_latest"] = []
country_rows_wide = output["country_rows_wide_latest"] = []
group_rows = output["group_rows_latest"] = []
group_rows_wide = output["group_rows_wide_latest"] = []
area_rows = output["area_rows_latest"] = []
area_rows_wide = output["area_rows_wide_latest"] = []
add_country_rows(most_recent_analysis, country_rows, country_rows_wide)
add_subnational_rows(
most_recent_analysis,
group_rows,
group_rows_wide,
area_rows,
area_rows_wide,
self.add_country_rows(most_recent_analysis, countryiso3, time_period,
country_rows, country_rows_wide)
self.add_subnational_rows(
most_recent_analysis, countryiso3, time_period, group_rows,
group_rows_wide, area_rows, area_rows_wide,
)
self.output["country_rows_latest"].extend(country_rows)
self.output["country_rows_wide_latest"].extend(country_rows_wide)
Expand All @@ -256,13 +265,11 @@ def process_areas(adm_row, adm):
area_rows = output["area_rows"] = []
area_rows_wide = output["area_rows_wide"] = []
for analysis in country_data:
add_country_rows(analysis, country_rows, country_rows_wide)
add_subnational_rows(
analysis,
group_rows,
group_rows_wide,
area_rows,
area_rows_wide,
self.add_country_rows(analysis, countryiso3, time_period,
country_rows, country_rows_wide)
self.add_subnational_rows(
analysis, countryiso3, time_period, group_rows,
group_rows_wide, area_rows, area_rows_wide,
)
self.output["country_rows"].extend(country_rows)
self.output["country_rows_wide"].extend(country_rows_wide)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hdx-python-api==6.3.1
hdx-python-api==6.3.2
python-slugify==8.0.4
9 changes: 4 additions & 5 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ def main(save: bool = False, use_saved: bool = False) -> None:
state_dict = deepcopy(state.get())
with wheretostart_tempdir_batch(lookup) as info:
folder = info["folder"]
with Download(
extra_params_yaml=join(expanduser("~"),
".extraparams.yaml"),
extra_params_lookup=lookup,
) as downloader:
with Download(extra_params_yaml=join(expanduser("~"),
".extraparams.yaml"),
extra_params_lookup=lookup) as downloader:

_, iterator = downloader.get_tabular_rows(
join("config", "ch_countries.csv"), dict_form=True)
ch_countries = [row["ISO_3"] for row in iterator]
Expand Down
2 changes: 1 addition & 1 deletion test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
pytest==8.2.2
pytest==8.3.2
pytest-cov==5.0.0
-r requirements.txt

0 comments on commit 78f7c4d

Please sign in to comment.