diff --git a/batch/gdal-python.dockerfile b/batch/gdal-python.dockerfile deleted file mode 100644 index c2d4a5986..000000000 --- a/batch/gdal-python.dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM globalforestwatch/data-api-gdal:v1.2.2 - -# Copy scripts -COPY ./batch/scripts/ /opt/scripts/ -COPY ./batch/python/ /opt/python/ - -# Make sure scripts are executable -RUN chmod +x -R /opt/scripts/ -RUN chmod +x -R /opt/python/ - -ENV PATH="/opt/scripts:${PATH}" -ENV PATH="/opt/python:${PATH}" - -ENV WORKDIR="/tmp" - -ENTRYPOINT ["/opt/scripts/report_status.sh"] \ No newline at end of file diff --git a/batch/postgresql-client.dockerfile b/batch/postgresql-client.dockerfile deleted file mode 100644 index 3dbefb1a6..000000000 --- a/batch/postgresql-client.dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM globalforestwatch/data-api-postgresql:v1.1.1 - -# Copy scripts -COPY ./batch/scripts/ /opt/scripts/ -COPY ./batch/python/ /opt/python/ - -# make sure scripts are executable -RUN chmod +x -R /opt/scripts/ -RUN chmod +x -R /opt/python/ - -ENV PATH="/opt/scripts:${PATH}" -ENV PATH="/opt/python:${PATH}" - -WORKDIR /tmp - -ENTRYPOINT ["/opt/scripts/report_status.sh"] \ No newline at end of file diff --git a/batch/scripts/create_vector_schema.sh b/batch/scripts/create_vector_schema.sh index 3a5152f32..31f33d636 100755 --- a/batch/scripts/create_vector_schema.sh +++ b/batch/scripts/create_vector_schema.sh @@ -23,7 +23,7 @@ set -u # TODO: Downloading the whole file for this step might be unnecessary # See https://gfw.atlassian.net/browse/GTC-2233 echo "AWSCLI: COPY DATA FROM $SRC TO $LOCAL_FILE" -aws s3 cp "$SRC" "$LOCAL_FILE" +aws s3 cp "$SRC" "$LOCAL_FILE" --no-progress # use virtual GDAL vsizip wrapper for ZIP files # TODO: [GTC-661] Allow for a more flexible file structure inside the ZIP file diff --git a/batch/scripts/load_vector_data.sh b/batch/scripts/load_vector_data.sh index 4e89fd175..3b81be235 100755 --- a/batch/scripts/load_vector_data.sh +++ b/batch/scripts/load_vector_data.sh @@ -20,7 +20,7 @@ ME=$(basename "$0") set -u echo "AWSCLI: COPY DATA FROM $SRC TO $LOCAL_FILE" -aws s3 cp "$SRC" "$LOCAL_FILE" +aws s3 cp "$SRC" "$LOCAL_FILE" --no-progress # use virtual GDAL vsizip wrapper for ZIP files # TODO: [GTC-661] Allow for a more flexible file structure inside the ZIP file diff --git a/batch/tile_cache.dockerfile b/batch/tile_cache.dockerfile deleted file mode 100644 index 3ab80ba30..000000000 --- a/batch/tile_cache.dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM globalforestwatch/data-api-tippecanoe:v1.3.1 - -# Copy scripts -COPY ./batch/scripts/ /opt/scripts/ -COPY ./batch/python/ /opt/python/ - -# make sure scripts are executable -RUN chmod +x -R /opt/scripts/ -RUN chmod +x -R /opt/python/ - -ENV PATH="/opt/scripts:${PATH}" -ENV PATH="/opt/python:${PATH}" - -WORKDIR /tmp - -ENTRYPOINT ["/opt/scripts/report_status.sh"] \ No newline at end of file diff --git a/batch/universal_batch.dockerfile b/batch/universal_batch.dockerfile new file mode 100644 index 000000000..f7103a647 --- /dev/null +++ b/batch/universal_batch.dockerfile @@ -0,0 +1,62 @@ +FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.3 +LABEL desc="Docker image with ALL THE THINGS for use in Batch by the GFW data API" +LABEL version="v1.1" + +ENV TIPPECANOE_VERSION=2.72.0 + +ENV VENV_DIR="/.venv" + +RUN apt-get update -y \ + && apt-get install --no-install-recommends -y python3 python-dev-is-python3 python3-venv \ + postgresql-client jq curl libsqlite3-dev zlib1g-dev zip libpq-dev build-essential gcc g++ \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# --system-site-packages is needed to copy the GDAL Python libs into the venv +RUN python -m venv ${VENV_DIR} --system-site-packages \ + && . ${VENV_DIR}/bin/activate \ + && python -m ensurepip --upgrade \ + && python -m pip install \ + agate~=1.12.0 \ + asyncpg~=0.30.0 \ + awscli~=1.36.18 \ + awscli-plugin-endpoint~=0.4 \ + boto3~=1.35.77 \ + click~=8.1.7 \ + csvkit~=2.0.1 \ + earthengine-api~=0.1.408 \ + fiona~=1.9.6 \ + gsutil~=5.31 \ + numpy~=1.26.4 \ + pandas~=2.1.4 \ + psycopg2~=2.9.10 \ + rasterio~=1.3.11 \ + setuptools~=75.6 \ + shapely~=2.0.4 \ + SQLAlchemy~=1.3.24 \ + tileputty~=0.2.10 + +# Install TippeCanoe +RUN mkdir -p /opt/src +WORKDIR /opt/src +RUN curl https://codeload.github.com/felt/tippecanoe/tar.gz/${TIPPECANOE_VERSION} | tar -xz \ + && cd /opt/src/tippecanoe-${TIPPECANOE_VERSION} \ + && make \ + && make install \ + && rm -R /opt/src/tippecanoe-${TIPPECANOE_VERSION} + +# Copy scripts +COPY ./batch/scripts/ /opt/scripts/ +COPY ./batch/python/ /opt/python/ + +# Make sure scripts are executable +RUN chmod +x -R /opt/scripts/ +RUN chmod +x -R /opt/python/ + +ENV PATH="/opt/scripts:${PATH}" +ENV PATH="/opt/python:${PATH}" + +ENV WORKDIR="/" +WORKDIR / + +ENTRYPOINT ["/opt/scripts/report_status.sh"] \ No newline at end of file diff --git a/scripts/develop b/scripts/develop index 45e62d936..87f97dde1 100755 --- a/scripts/develop +++ b/scripts/develop @@ -24,9 +24,7 @@ done set -- "${POSITIONAL[@]}" # restore positional parameters if [ "${BUILD}" = true ]; then - docker build -t batch_gdal-python_test . -f batch/gdal-python.dockerfile - docker build -t batch_postgresql-client_test . -f batch/postgresql-client.dockerfile - docker build -t batch_tile_cache_test . -f batch/tile_cache.dockerfile + docker build -t batch_jobs_test . -f batch/universal_batch.dockerfile docker build -t pixetl_test . -f batch/pixetl.dockerfile docker compose -f docker-compose.dev.yml --project-name gfw-data-api_dev up --abort-on-container-exit --remove-orphans --build else diff --git a/scripts/test b/scripts/test index 95dbf3e8d..ce4d3d677 100755 --- a/scripts/test +++ b/scripts/test @@ -60,9 +60,7 @@ if [ $# -eq 0 ]; then fi if [ "${BUILD}" = true ]; then - docker build -t batch_gdal-python_test . -f batch/gdal-python.dockerfile - docker build -t batch_postgresql-client_test . -f batch/postgresql-client.dockerfile - docker build -t batch_tile_cache_test . -f batch/tile_cache.dockerfile + docker build -t batch_jobs_test . -f batch/universal_batch.dockerfile docker build -t pixetl_test . -f batch/pixetl.dockerfile docker compose -f docker-compose.test.yml --project-name gfw-data-api_test build --no-cache app_test fi diff --git a/scripts/test_v2 b/scripts/test_v2 index 1a78adce5..df8ec17ed 100755 --- a/scripts/test_v2 +++ b/scripts/test_v2 @@ -52,9 +52,7 @@ if [ $# -eq 0 ]; then fi if [ "${BUILD}" = true ]; then - docker build -t batch_gdal-python_test . -f batch/gdal-python.dockerfile - docker build -t batch_postgresql-client_test . -f batch/postgresql-client.dockerfile - docker build -t batch_tile_cache_test . -f batch/tile_cache.dockerfile + docker build -t batch_jobs_test . -f batch/universal_batch.dockerfile docker build -t pixetl_test . -f batch/pixetl.dockerfile docker compose -f docker-compose.test.yml --project-name gfw-data-api_test build --no-cache app_test fi diff --git a/terraform/main.tf b/terraform/main.tf index 08933a1ea..b55791ebe 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -44,15 +44,6 @@ module "app_docker_image" { tag = local.container_tag } -# Docker image for GDAL Python Batch jobs -module "batch_gdal_python_image" { - source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" - image_name = substr(lower("${local.project}-gdal_python${local.name_suffix}"), 0, 64) - root_dir = "${path.root}/../" - docker_path = "batch" - docker_filename = "gdal-python.dockerfile" -} - # Docker image for PixETL Batch jobs module "batch_pixetl_image" { source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" @@ -62,25 +53,15 @@ module "batch_pixetl_image" { docker_filename = "pixetl.dockerfile" } -# Docker image for PostgreSQL Client Batch jobs -module "batch_postgresql_client_image" { +# Docker image for all Batch jobs except those requiring PixETL +module "batch_universal_image" { source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" - image_name = substr(lower("${local.project}-postgresql_client${local.name_suffix}"), 0, 64) + image_name = substr(lower("${local.project}-universal${local.name_suffix}"), 0, 64) root_dir = "${path.root}/../" docker_path = "batch" - docker_filename = "postgresql-client.dockerfile" + docker_filename = "universal_batch.dockerfile" } -# Docker image for Tile Cache Batch jobs -module "batch_tile_cache_image" { - source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" - image_name = substr(lower("${local.project}-tile_cache${local.name_suffix}"), 0, 64) - root_dir = "${path.root}/../" - docker_path = "batch" - docker_filename = "tile_cache.dockerfile" -} - - module "fargate_autoscaling" { source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/fargate_autoscaling?ref=v0.4.2.5" project = local.project @@ -221,10 +202,10 @@ module "batch_job_queues" { environment = var.environment name_suffix = local.name_suffix project = local.project - gdal_repository_url = "${module.batch_gdal_python_image.repository_url}:latest" + gdal_repository_url = "${module.batch_universal_image.repository_url}:latest" pixetl_repository_url = "${module.batch_pixetl_image.repository_url}:latest" - postgres_repository_url = "${module.batch_postgresql_client_image.repository_url}:latest" - tile_cache_repository_url = "${module.batch_tile_cache_image.repository_url}:latest" + postgres_repository_url = "${module.batch_universal_image.repository_url}:latest" + tile_cache_repository_url = "${module.batch_universal_image.repository_url}:latest" iam_policy_arn = [ "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess", aws_iam_policy.query_batch_jobs.arn, diff --git a/tests/conftest.py b/tests/conftest.py index 74e415d2d..00459869b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -181,11 +181,11 @@ def patch_run(self, *k, **kwargs): ON_DEMAND_COMPUTE_JOB_QUEUE, cogify_env["computeEnvironmentArn"] ) - aws_mock.add_job_definition(GDAL_PYTHON_JOB_DEFINITION, "batch_gdal-python_test") + aws_mock.add_job_definition(GDAL_PYTHON_JOB_DEFINITION, "batch_jobs_test") aws_mock.add_job_definition( - POSTGRESQL_CLIENT_JOB_DEFINITION, "batch_postgresql-client_test" + POSTGRESQL_CLIENT_JOB_DEFINITION, "batch_jobs_test" ) - aws_mock.add_job_definition(TILE_CACHE_JOB_DEFINITION, "batch_tile_cache_test") + aws_mock.add_job_definition(TILE_CACHE_JOB_DEFINITION, "batch_jobs_test") aws_mock.add_job_definition(PIXETL_JOB_DEFINITION, "pixetl_test", mount_tmp=True) yield aws_mock.mocked_services["batch"]["client"], aws_mock.mocked_services["logs"][ @@ -223,7 +223,7 @@ def httpd(): t.join() -@pytest.fixture(autouse=True) +@pytest.fixture(autouse=True, scope="function") def flush_request_list(httpd): """Delete request cache before every test.""" _ = httpx.delete(f"http://localhost:{httpd.server_port}") diff --git a/tests/fixtures/test.gpkg.zip b/tests/fixtures/test.gpkg.zip index 10fbec156..3562da31e 100644 Binary files a/tests/fixtures/test.gpkg.zip and b/tests/fixtures/test.gpkg.zip differ diff --git a/tests/routes/datasets/test_versions.py b/tests/routes/datasets/test_versions.py index 0c3b084f9..522c452c1 100755 --- a/tests/routes/datasets/test_versions.py +++ b/tests/routes/datasets/test_versions.py @@ -448,6 +448,7 @@ async def test_version_put_raster(async_client: AsyncClient): ) assert response.status_code == 404 + @pytest.mark.asyncio async def test_version_post_append(async_client: AsyncClient): """Test version append operations.""" @@ -470,7 +471,7 @@ async def test_version_post_append(async_client: AsyncClient): dataset_payload=dataset_payload, version_payload=version_payload, async_client=async_client, - execute_batch_jobs=False, + execute_batch_jobs=True, ) response = await async_client.get(f"/dataset/{dataset}/{version}") @@ -517,6 +518,7 @@ async def test_version_post_append(async_client: AsyncClient): ## TODO: test with missing layers + @pytest.mark.hanging @pytest.mark.asyncio async def test_version_put_raster_bug_fixes(async_client: AsyncClient):