Skip to content

Commit

Permalink
Merge pull request #58 from OCHA-DAP/add_iso_to_fuzzy_dont
Browse files Browse the repository at this point in the history
Add iso and parent to admin fuzzy dont
  • Loading branch information
b-j-mills authored Oct 16, 2024
2 parents 332f867 + 2445f24 commit ff4738c
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 14 deletions.
20 changes: 9 additions & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ cfgv==3.4.0
# via pre-commit
chardet==5.2.0
# via frictionless
charset-normalizer==3.3.2
charset-normalizer==3.4.0
# via requests
click==8.1.7
# via typer
coverage==7.6.1
coverage==7.6.3
# via pytest-cov
distlib==0.3.8
distlib==0.3.9
# via virtualenv
et-xmlfile==1.1.0
# via openpyxl
Expand All @@ -40,19 +40,19 @@ ijson==3.3.0
# via hdx-python-utilities
iniconfig==2.0.0
# via pytest
isodate==0.6.1
isodate==0.7.2
# via frictionless
jinja2==3.1.4
# via frictionless
jsonlines==4.0.0
# via hdx-python-utilities
jsonpath-ng==1.6.1
jsonpath-ng==1.7.0
# via libhxl
jsonschema==4.23.0
# via
# frictionless
# tableschema-to-template
jsonschema-specifications==2023.12.1
jsonschema-specifications==2024.10.1
# via jsonschema
libhxl==5.2.1
# via hdx-python-country (pyproject.toml)
Expand All @@ -62,7 +62,7 @@ markdown-it-py==3.0.0
# via rich
marko==2.1.2
# via frictionless
markupsafe==2.1.5
markupsafe==3.0.1
# via jinja2
mdurl==0.1.2
# via markdown-it-py
Expand All @@ -82,7 +82,7 @@ ply==3.11
# via
# jsonpath-ng
# libhxl
pre-commit==4.0.0
pre-commit==4.0.1
# via hdx-python-country (pyproject.toml)
pydantic==2.9.2
# via frictionless
Expand Down Expand Up @@ -142,9 +142,7 @@ shellingham==1.5.4
simpleeval==1.0.0
# via frictionless
six==1.16.0
# via
# isodate
# python-dateutil
# via python-dateutil
stringcase==1.2.0
# via frictionless
structlog==24.4.0
Expand Down
36 changes: 35 additions & 1 deletion src/hdx/location/adminlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,38 @@ def get_admin_name_replacements(
continue
return relevant_name_replacements

def get_admin_fuzzy_dont(
self, countryiso3: str, parent: Optional[str]
) -> List[str]:
"""Get relevant admin names that should not be fuzzy matched from
admin fuzzy dont which is a list of strings. These can be global
or they can be restricted by country or parent. Keys take the form
"DONT_MATCH", "AFG|DONT_MATCH", or "AF01|DONT_MATCH".
Args:
countryiso3 (str): ISO3 country code
parent (Optional[str]): Parent admin code
Returns:
List[str]: Relevant admin names that should not be fuzzy matched
"""
relevant_admin_fuzzy_dont = []
for value in self.admin_fuzzy_dont:
if "|" not in value:
if value not in relevant_admin_fuzzy_dont:
relevant_admin_fuzzy_dont.append(value)
continue
prefix, name = value.split("|")
if parent:
if prefix == parent:
if name not in relevant_admin_fuzzy_dont:
relevant_admin_fuzzy_dont.append(name)
if prefix == countryiso3:
if name not in relevant_admin_fuzzy_dont:
relevant_admin_fuzzy_dont.append(name)
continue
return relevant_admin_fuzzy_dont

def fuzzy_pcode(
self,
countryiso3: str,
Expand Down Expand Up @@ -658,7 +690,9 @@ def fuzzy_pcode(
pcode = name_to_pcode.get(
normalised_name, name_to_pcode.get(alt_normalised_name)
)
if not pcode and name.lower() in self.admin_fuzzy_dont:
if not pcode and name.lower() in self.get_admin_fuzzy_dont(
countryiso3, parent
):
if logname:
self.ignored.add((logname, countryiso3, name))
return None
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/adminlevel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ admin_name_replacements:
" oblast": ""

admin_fuzzy_dont:
- "nord"
- "YEM|nord"
- "north"
- "sud"
- "south"
Expand Down
2 changes: 1 addition & 1 deletion tests/hdx/location/test_adminlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def test_adminlevel_with_url(self, config, url, fixtures_dir):
adminone = AdminLevel(config)
adminone.setup_from_url()
assert adminone.get_admin_level("YEM") == 1
assert len(adminone.get_pcode_list()) == 2510
assert len(adminone.get_pcode_list()) == 2526
assert adminone.get_pcode_length("YEM") == 4
assert adminone.get_pcode("YEM", "YE30", logname="test") == (
"YE30",
Expand Down

0 comments on commit ff4738c

Please sign in to comment.