diff --git a/requirements.txt b/requirements.txt index 73a6d7c..df1b14e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,13 +14,13 @@ cfgv==3.4.0 # via pre-commit chardet==5.2.0 # via frictionless -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests click==8.1.7 # via typer -coverage==7.6.1 +coverage==7.6.3 # via pytest-cov -distlib==0.3.8 +distlib==0.3.9 # via virtualenv et-xmlfile==1.1.0 # via openpyxl @@ -40,19 +40,19 @@ ijson==3.3.0 # via hdx-python-utilities iniconfig==2.0.0 # via pytest -isodate==0.6.1 +isodate==0.7.2 # via frictionless jinja2==3.1.4 # via frictionless jsonlines==4.0.0 # via hdx-python-utilities -jsonpath-ng==1.6.1 +jsonpath-ng==1.7.0 # via libhxl jsonschema==4.23.0 # via # frictionless # tableschema-to-template -jsonschema-specifications==2023.12.1 +jsonschema-specifications==2024.10.1 # via jsonschema libhxl==5.2.1 # via hdx-python-country (pyproject.toml) @@ -62,7 +62,7 @@ markdown-it-py==3.0.0 # via rich marko==2.1.2 # via frictionless -markupsafe==2.1.5 +markupsafe==3.0.1 # via jinja2 mdurl==0.1.2 # via markdown-it-py @@ -82,7 +82,7 @@ ply==3.11 # via # jsonpath-ng # libhxl -pre-commit==4.0.0 +pre-commit==4.0.1 # via hdx-python-country (pyproject.toml) pydantic==2.9.2 # via frictionless @@ -142,9 +142,7 @@ shellingham==1.5.4 simpleeval==1.0.0 # via frictionless six==1.16.0 - # via - # isodate - # python-dateutil + # via python-dateutil stringcase==1.2.0 # via frictionless structlog==24.4.0 diff --git a/src/hdx/location/adminlevel.py b/src/hdx/location/adminlevel.py index aef7258..6dfe91c 100755 --- a/src/hdx/location/adminlevel.py +++ b/src/hdx/location/adminlevel.py @@ -602,6 +602,38 @@ def get_admin_name_replacements( continue return relevant_name_replacements + def get_admin_fuzzy_dont( + self, countryiso3: str, parent: Optional[str] + ) -> List[str]: + """Get relevant admin names that should not be fuzzy matched from + admin fuzzy dont which is a list of strings. These can be global + or they can be restricted by country or parent. Keys take the form + "DONT_MATCH", "AFG|DONT_MATCH", or "AF01|DONT_MATCH". + + Args: + countryiso3 (str): ISO3 country code + parent (Optional[str]): Parent admin code + + Returns: + List[str]: Relevant admin names that should not be fuzzy matched + """ + relevant_admin_fuzzy_dont = [] + for value in self.admin_fuzzy_dont: + if "|" not in value: + if value not in relevant_admin_fuzzy_dont: + relevant_admin_fuzzy_dont.append(value) + continue + prefix, name = value.split("|") + if parent: + if prefix == parent: + if name not in relevant_admin_fuzzy_dont: + relevant_admin_fuzzy_dont.append(name) + if prefix == countryiso3: + if name not in relevant_admin_fuzzy_dont: + relevant_admin_fuzzy_dont.append(name) + continue + return relevant_admin_fuzzy_dont + def fuzzy_pcode( self, countryiso3: str, @@ -658,7 +690,9 @@ def fuzzy_pcode( pcode = name_to_pcode.get( normalised_name, name_to_pcode.get(alt_normalised_name) ) - if not pcode and name.lower() in self.admin_fuzzy_dont: + if not pcode and name.lower() in self.get_admin_fuzzy_dont( + countryiso3, parent + ): if logname: self.ignored.add((logname, countryiso3, name)) return None diff --git a/tests/fixtures/adminlevel.yaml b/tests/fixtures/adminlevel.yaml index 17fdc85..a53d5fe 100755 --- a/tests/fixtures/adminlevel.yaml +++ b/tests/fixtures/adminlevel.yaml @@ -516,7 +516,7 @@ admin_name_replacements: " oblast": "" admin_fuzzy_dont: - - "nord" + - "YEM|nord" - "north" - "sud" - "south" diff --git a/tests/hdx/location/test_adminlevel.py b/tests/hdx/location/test_adminlevel.py index b2dc872..6480796 100755 --- a/tests/hdx/location/test_adminlevel.py +++ b/tests/hdx/location/test_adminlevel.py @@ -421,7 +421,7 @@ def test_adminlevel_with_url(self, config, url, fixtures_dir): adminone = AdminLevel(config) adminone.setup_from_url() assert adminone.get_admin_level("YEM") == 1 - assert len(adminone.get_pcode_list()) == 2510 + assert len(adminone.get_pcode_list()) == 2526 assert adminone.get_pcode_length("YEM") == 4 assert adminone.get_pcode("YEM", "YE30", logname="test") == ( "YE30",