wri · dmannarino · Jan 29, 2025 · Dec 17, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/.isort.cfg b/.isort.cfg
@@ -2,4 +2,4 @@
 line_length = 88
 multi_line_output = 3
 include_trailing_comma = True
-known_third_party = _pytest,aenum,affine,alembic,asgi_lifespan,async_lru,asyncpg,aws_utils,boto3,botocore,click,docker,ee,errors,fastapi,fiona,gdal_utils,geoalchemy2,geojson,gfw_pixetl,gino,gino_starlette,google,httpx,httpx_auth,logger,logging_utils,moto,numpy,orjson,osgeo,pandas,pendulum,pglast,psutil,psycopg2,pydantic,pyproj,pytest,pytest_asyncio,rasterio,shapely,sqlalchemy,sqlalchemy_utils,starlette,tileputty,tiles_geojson,typer
+known_third_party = _pytest,aenum,affine,alembic,asgi_lifespan,async_lru,asyncpg,aws_utils,boto3,botocore,click,docker,ee,errors,fastapi,fiona,gdal_utils,geoalchemy2,geojson,gfw_pixetl,gino,gino_starlette,google,httpx,httpx_auth,logger,logging_utils,moto,numpy,orjson,osgeo,pandas,pendulum,pglast,psutil,psycopg2,pydantic,pyproj,pytest,pytest_asyncio,rasterio,shapely,sqlalchemy,sqlalchemy_utils,starlette,tileputty,tiles_geojson,typer,unidecode
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,32 +10,32 @@ repos:
     hooks:
     - id: isort
 -   repo: https://github.com/myint/docformatter
-    rev: v1.4
+    rev: eb1df347edd128b30cd3368dddc3aa65edcfac38  # pragma: allowlist secret
     hooks:
     - id: docformatter
       args: [--in-place]
 -   repo: https://github.com/ambv/black
-    rev: 22.12.0
+    rev: 24.10.0
     hooks:
     - id: black
       language_version: python3.10
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
+    rev: v5.0.0
     hooks:
     - id: detect-aws-credentials
     - id: detect-private-key
     - id: trailing-whitespace
 -   repo: https://github.com/pycqa/flake8
-    rev: 6.0.0
+    rev: 7.1.1
     hooks:
     - id: flake8
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.971
+    rev: v1.14.1
     hooks:
-    -   id: mypy
+    - id: mypy
 -   repo: https://github.com/Yelp/detect-secrets
-    rev: v1.3.0
+    rev: v1.5.0
     hooks:
-    -   id: detect-secrets
-        args: ['--baseline', '.secrets.baseline'] # run: `pip install detect-secrets` to establish baseline
-        exclude: Pipfile.lock
+    - id: detect-secrets
+      args: ['--baseline', '.secrets.baseline'] # run: `pip install detect-secrets` to establish baseline
+      exclude: Pipfile.lock
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -82,7 +82,7 @@
       }
     ]
   },
-  "version": "1.3.0",
+  "version": "1.5.0",
   "filters_used": [
     {
       "path": "detect_secrets.filters.allowlist.is_line_allowlisted"

diff --git a/Pipfile b/Pipfile
@@ -60,6 +60,7 @@ sqlalchemy = "<1.4"
 sqlalchemy-utils = "*"
 starlette = "*"
 typer = "*"
+unidecode = "*"
 uvicorn = {version = "*", extras = ["standard"]}
 
 [requires]

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/app/main.py b/app/main.py
@@ -13,6 +13,7 @@
 from starlette.middleware.base import BaseHTTPMiddleware
 
 from app.errors import http_error_handler
+from app.routes.political import id_lookup
 
 from .application import app
 from .middleware import no_cache_response_header, redirect_latest, set_db_mode
@@ -128,6 +129,13 @@ async def rve_error_handler(
     app.include_router(r, prefix="/dataset")
 
 
+################
+# POLITICAL API #
+################
+
+app.include_router(id_lookup.router, prefix="/political")
+
+
 ###############
 # ASSET API
 ###############

diff --git a/app/models/pydantic/geoencoder.py b/app/models/pydantic/geoencoder.py
@@ -0,0 +1,97 @@
+from typing import List, Optional
+
+from fastapi.params import Query
+from pydantic import Field, root_validator
+
+from app.models.pydantic.base import StrictBaseModel
+from app.models.pydantic.responses import Response
+from app.settings.globals import ENV, per_env_admin_boundary_versions
+
+
+class GeoencoderQueryParams(StrictBaseModel):
+    admin_source: str = Field(
+        "GADM",
+        description=(
+            "The source of administrative boundaries to use "
+            "(currently the only valid choice is 'GADM')."
+        ),
+    )
+    admin_version: str = Query(
+        ...,
+        description=(
+            "The version of the administrative boundaries to use "
+            "(note that this represents the release of the source dataset, "
+            "not the GFW Data API's idea of the version in the database)."
+        ),
+    )
+    country: str = Query(
+        ...,
+        description="Name of the country to match.",
+    )
+    region: Optional[str] = Query(
+        None,
+        description="Name of the region to match.",
+    )
+    subregion: Optional[str] = Query(
+        None,
+        description="Name of the subregion to match.",
+    )
+    normalize_search: bool = Query(
+        True,
+        description=(
+            "Whether or not to perform a case- and " "accent-insensitive search."
+        ),
+    )
+
+    @root_validator(pre=True)
+    def validate_params(cls, values):
+        source = values.get("admin_source")
+        if source is None:
+            raise ValueError(
+                "You must provide admin_source or leave unset for the "
+                " default value of 'GADM'."
+            )
+
+        version = values.get("admin_version")
+        if version is None:
+            raise ValueError("You must provide an admin_version")
+
+        sources_in_this_env = per_env_admin_boundary_versions[ENV]
+
+        versions_of_source_in_this_env = sources_in_this_env.get(source)
+        if versions_of_source_in_this_env is None:
+            raise ValueError(
+                f"Invalid administrative boundary source {source}. Valid "
+                f"sources in this environment are {[v for v in sources_in_this_env.keys()]}"
+            )
+
+        deployed_version_in_data_api = versions_of_source_in_this_env.get(version)
+        if deployed_version_in_data_api is None:
+            raise ValueError(
+                f"Invalid version {version} for administrative boundary source "
+                f"{source}. Valid versions for this source in this environment are "
+                f"{[v for v in versions_of_source_in_this_env.keys()]}"
+            )
+
+        return values
+
+
+class GeoencoderMatchElement(StrictBaseModel):
+    id: str | None
+    name: str | None
+
+
+class GeoencoderMatch(StrictBaseModel):
+    country: GeoencoderMatchElement
+    region: GeoencoderMatchElement
+    subregion: GeoencoderMatchElement
+
+
+class GeoencoderResponseData(StrictBaseModel):
+    adminSource: str
+    adminVersion: str
+    matches: List[GeoencoderMatch]
+
+
+class GeoencoderResponse(Response):
+    data: GeoencoderResponseData
diff --git a/app/routes/political/__init__.py b/app/routes/political/__init__.py
diff --git a/app/routes/political/id_lookup.py b/app/routes/political/id_lookup.py
@@ -0,0 +1,163 @@
+from typing import Annotated, Any, Dict, List
+
+from fastapi import APIRouter, HTTPException, Query
+from unidecode import unidecode
+
+from app.models.pydantic.geoencoder import GeoencoderQueryParams, GeoencoderResponse
+from app.routes.datasets.queries import _query_dataset_json
+from app.settings.globals import ENV, per_env_admin_boundary_versions
+
+router = APIRouter()
+
+
+@router.get("/id-lookup", status_code=200, include_in_schema=False)
+async def id_lookup(params: Annotated[GeoencoderQueryParams, Query()]):
+    """Look up administrative boundary IDs matching a specified country name
+    (and region name and subregion name, if specified)."""
+    admin_source_to_dataset: Dict[str, str] = {"GADM": "gadm_administrative_boundaries"}
+
+    try:
+        dataset: str = admin_source_to_dataset[params.admin_source]
+    except KeyError:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                "Invalid admin boundary source. Valid sources:"
+                f" {[source for source in admin_source_to_dataset.keys()]}"
+            ),
+        )
+
+    version_str: str = lookup_admin_source_version(
+        params.admin_source, params.admin_version
+    )
+
+    names: List[str | None] = sanitize_names(
+        params.normalize_search, params.country, params.region, params.subregion
+    )
+
+    adm_level: int = determine_admin_level(*names)
+
+    sql: str = _admin_boundary_lookup_sql(
+        adm_level, params.normalize_search, dataset, *names
+    )
+
+    json_data: List[Dict[str, Any]] = await _query_dataset_json(
+        dataset, version_str, sql, None
+    )
+
+    return form_geoencoder_response(
+        params.admin_source, params.admin_version, adm_level, json_data
+    )
+
+
+def sanitize_names(
+    normalize_search: bool,
+    country: str | None,
+    region: str | None,
+    subregion: str | None,
+) -> List[str | None]:
+    """Turn any empty strings into Nones, enforces the admin level hierarchy,
+    and optionally unaccents and decapitalizes names."""
+    names: List[str | None] = []
+
+    if subregion and not region:
+        raise HTTPException(
+            status_code=400,
+            detail="If subregion is specified, region must be specified as well.",
+        )
+
+    for name in (country, region, subregion):
+        if name and normalize_search:
+            names.append(unidecode(name).lower())
+        elif name:
+            names.append(name)
+        else:
+            names.append(None)
+    return names
+
+
+def determine_admin_level(
+    country: str | None, region: str | None, subregion: str | None
+) -> int:
+    """Infer the native admin level of a request based on the presence of non-
+    empty fields."""
+    if subregion:
+        return 2
+    elif region:
+        return 1
+    elif country:
+        return 0
+    else:  # Shouldn't get here if FastAPI route definition worked
+        raise HTTPException(status_code=400, detail="Country MUST be specified.")
+
+
+def _admin_boundary_lookup_sql(
+    adm_level: int,
+    normalize_search: bool,
+    dataset: str,
+    country_name: str,
+    region_name: str | None,
+    subregion_name: str | None,
+) -> str:
+    """Generate the SQL required to look up administrative boundary IDs by
+    name."""
+    name_fields: List[str] = ["country", "name_1", "name_2"]
+    if normalize_search:
+        match_name_fields = [name_field + "_normalized" for name_field in name_fields]
+    else:
+        match_name_fields = name_fields
+
+    sql = (
+        f"SELECT gid_0, gid_1, gid_2, {name_fields[0]}, {name_fields[1]}, {name_fields[2]}"
+        f" FROM {dataset} WHERE {match_name_fields[0]}=$country${country_name}$country$"
+    )
+    if region_name is not None:
+        sql += f" AND {match_name_fields[1]}=$region${region_name}$region$"
+    if subregion_name is not None:
+        sql += f" AND {match_name_fields[2]}=$subregion${subregion_name}$subregion$"
+
+    sql += f" AND adm_level='{adm_level}'"
+
+    return sql
+
+
+def lookup_admin_source_version(source, version) -> str:
+    # The GeoencoderQueryParams validator should have already ensured
+    # that the following is safe
+    deployed_version_in_data_api = per_env_admin_boundary_versions[ENV][source][version]
+
+    return deployed_version_in_data_api
+
+
+def form_geoencoder_response(
+    admin_source, admin_version, adm_level, match_list
+) -> GeoencoderResponse:
+    matches = []
+
+    for match in match_list:
+        country = {"id": extract_level_gid(0, match), "name": match["country"]}
+
+        if adm_level < 1:
+            region = {"id": None, "name": None}
+        else:
+            region = {"id": extract_level_gid(1, match), "name": match["name_1"]}
+
+        if adm_level < 2:
+            subregion = {"id": None, "name": None}
+        else:
+            subregion = {"id": extract_level_gid(2, match), "name": match["name_2"]}
+
+        matches.append({"country": country, "region": region, "subregion": subregion})
+
+    data = {
+        "adminSource": admin_source,
+        "adminVersion": admin_version,
+        "matches": matches,
+    }
+    resp = GeoencoderResponse(**{"data": data})
+    return resp
+
+
+def extract_level_gid(gid_level, match):
+    gid_level_name = f"gid_{gid_level}"
+    return (match[gid_level_name].rsplit("_")[0]).split(".")[gid_level]
diff --git a/app/settings/globals.py b/app/settings/globals.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path
-from typing import Optional
+from typing import Dict, Optional
 
 from starlette.config import Config
 from starlette.datastructures import Secret
@@ -185,3 +185,28 @@
 RASTER_ANALYSIS_STATE_MACHINE_ARN = config(
     "RASTER_ANALYSIS_STATE_MACHINE_ARN", cast=str, default=None
 )
+
+# TODO: Find a good home for this:
+per_env_admin_boundary_versions: Dict[str, Dict[str, Dict]] = {
+    "test": {
+        "GADM": {
+            "3.6": "v3.6",
+            "4.1": "v4.1.64",
+        }
+    },
+    "dev": {
+        "GADM": {
+            "4.1": "v4.1.64",
+        }
+    },
+    "staging": {
+        "GADM": {
+            "4.1": "v4.1.64",
+        }
+    },
+    "production": {
+        "GADM": {
+            "4.1": "v4.1.64",
+        }
+    },
+}
diff --git a/tests_v2/unit/app/routes/political/__init__.py b/tests_v2/unit/app/routes/political/__init__.py
diff --git a/tests_v2/unit/app/routes/political/id_lookup/__init__.py b/tests_v2/unit/app/routes/political/id_lookup/__init__.py