diff --git a/current/LAU/collection.json b/current/LAU/collection.json new file mode 100644 index 0000000000..8ebe0ef4ff --- /dev/null +++ b/current/LAU/collection.json @@ -0,0 +1,163 @@ +{ + "type": "Collection", + "id": "LAU", + "stac_version": "1.0.0", + "description": "The LAUs are a administrative to help ensure that data are readily available and policies can be implemented effectively using these units, a subdivision of the NUTS 3 regions covering the EU's whole economic territory and appropriate for implementing local level typologies included in Tercet, namely the coastal area and the degree of urbanisation (DEGURBA) classification, including city and functional urban areas (FUA) definitions.", + "links": [ + { + "rel": "root", + "href": "../catalog.json", + "type": "application/json", + "title": "CoCliCo STAC Catalog" + }, + { + "rel": "item", + "href": "./items/LAU_RG_01M_2020_3035.json", + "type": "application/json" + }, + { + "rel": "parent", + "href": "../catalog.json", + "type": "application/json", + "title": "CoCliCo STAC Catalog" + } + ], + "stac_extensions": [ + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", + "https://stac-extensions.github.io/table/v1.2.0/schema.json", + "https://stac-extensions.github.io/version/v1.2.0/schema.json" + ], + "item_assets": { + "data": { + "title": "LAU", + "description": "A system of local administrative units (LAUs) compatible with NUTS", + "roles": [ + "data" + ], + "type": "application/vnd.apache.parquet", + "table:storage_options": { + "account_name": "coclico" + }, + "table:columns": [ + { + "name": "GISCO_ID", + "type": "string", + "description": "" + }, + { + "name": "CNTR_CODE", + "type": "string", + "description": "" + }, + { + "name": "LAU_ID", + "type": "string", + "description": "" + }, + { + "name": "LAU_NAME", + "type": "string", + "description": "" + }, + { + "name": "POP_2020", + "type": "double", + "description": "" + }, + { + "name": "POP_DENS_2", + "type": "double", + "description": "" + }, + { + "name": "AREA_KM2", + "type": "double", + "description": "" + }, + { + "name": "YEAR", + "type": "int64", + "description": "" + }, + { + "name": "FID", + "type": "string", + "description": "" + }, + { + "name": "geometry", + "type": "binary", + "description": "" + } + ] + } + }, + "base_url": "gs://coclico-data-public/coclico/LAU", + "sci:citation": "None.", + "version": "1", + "title": "Local administrative units", + "extent": { + "spatial": { + "bbox": [ + [ + -81.78251054209933, + -29.857519696177313, + 93.1752791130622, + 37.855412297701754 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2024-01-22T00:00:00Z", + "2024-01-22T00:00:00Z" + ] + ] + } + }, + "license": "CC-BY-4.0", + "keywords": [ + "Local", + "Adminstrative", + "Units", + "European Union", + "Full-Track", + "Background Layer" + ], + "providers": [ + { + "name": "Eurostat", + "roles": [ + "producer", + "licensor" + ], + "url": "https://ec.europa.eu/eurostat/web/nuts/local-administrative-units" + }, + { + "name": "Deltares", + "roles": [ + "processor", + "host" + ], + "url": "https://deltares.nl" + } + ], + "assets": { + "thumbnail": { + "href": "https://storage.googleapis.com/download/storage/v1/b/coclico-data-public/o/coclico%2Fassets%2Fthumbnails%2FLAU.png?alt=media", + "type": "image/png", + "title": "Thumbnail" + }, + "geoparquet-stac-items": { + "href": "gs://coclico-data-public/coclico/items/LAU.parquet", + "type": "application/vnd.apache.parquet", + "title": "GeoParquet STAC items", + "description": "Snapshot of the collection's STAC items exported to GeoParquet format.", + "roles": [ + "data" + ] + } + } +} \ No newline at end of file diff --git a/current/LAU/items/LAU_RG_01M_2020_3035.json b/current/LAU/items/LAU_RG_01M_2020_3035.json new file mode 100644 index 0000000000..9614655e12 --- /dev/null +++ b/current/LAU/items/LAU_RG_01M_2020_3035.json @@ -0,0 +1,140 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "id": "LAU_RG_01M_2020_3035", + "properties": { + "title": "LAU", + "description": "A system of local administrative units (LAUs) compatible with NUTS", + "table:columns": [ + { + "name": "GISCO_ID", + "type": "string", + "description": "" + }, + { + "name": "CNTR_CODE", + "type": "string", + "description": "" + }, + { + "name": "LAU_ID", + "type": "string", + "description": "" + }, + { + "name": "LAU_NAME", + "type": "string", + "description": "" + }, + { + "name": "POP_2020", + "type": "double", + "description": "" + }, + { + "name": "POP_DENS_2", + "type": "double", + "description": "" + }, + { + "name": "AREA_KM2", + "type": "double", + "description": "" + }, + { + "name": "YEAR", + "type": "int64", + "description": "" + }, + { + "name": "FID", + "type": "string", + "description": "" + }, + { + "name": "geometry", + "type": "binary", + "description": "" + } + ], + "proj:bbox": [ + -2824243.6908, + -3076198.0823, + 10026005.2215, + 5415709.913699999 + ], + "proj:epsg": 3035, + "table:row_count": 98613, + "created": "2024-09-24T13:17:15.784538Z", + "datetime": "2024-01-22T00:00:00Z" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 93.1752791130622, + -29.857519696177313 + ], + [ + 93.1752791130622, + 37.855412297701754 + ], + [ + -81.78251054209933, + 37.855412297701754 + ], + [ + -81.78251054209933, + -29.857519696177313 + ], + [ + 93.1752791130622, + -29.857519696177313 + ] + ] + ] + }, + "links": [ + { + "rel": "root", + "href": "../../catalog.json", + "type": "application/json", + "title": "CoCliCo STAC Catalog" + }, + { + "rel": "collection", + "href": "../collection.json", + "type": "application/json", + "title": "Local administrative units" + }, + { + "rel": "parent", + "href": "../collection.json", + "type": "application/json", + "title": "Local administrative units" + } + ], + "assets": { + "data": { + "href": "gs://coclico-data-public/coclico/LAU/LAU_RG_01M_2020_3035.parquet", + "type": "application/vnd.apache.parquet", + "title": "LAU", + "description": "A system of local administrative units (LAUs) compatible with NUTS", + "roles": [ + "data" + ] + } + }, + "bbox": [ + -81.78251054209933, + -29.857519696177313, + 93.1752791130622, + 37.855412297701754 + ], + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json", + "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + ], + "collection": "LAU" +} \ No newline at end of file diff --git a/current/NUTS/collection.json b/current/NUTS/collection.json new file mode 100644 index 0000000000..1d8e822fa1 --- /dev/null +++ b/current/NUTS/collection.json @@ -0,0 +1,158 @@ +{ + "type": "Collection", + "id": "NUTS", + "stac_version": "1.0.0", + "description": "The GISCO statistical unit dataset represents the NUTS (nomenclature of territorial units for statistics) and statistical regions by means of multipart polygon, polyline and point topology. The NUTS geographical information is completed by attribute tables and a set of cartographic help lines to better visualise multipart polygonal regions.", + "links": [ + { + "rel": "root", + "href": "../catalog.json", + "type": "application/json", + "title": "CoCliCo STAC Catalog" + }, + { + "rel": "item", + "href": "./items/NUTS_RG_01M_2021_3035.json", + "type": "application/json" + }, + { + "rel": "parent", + "href": "../catalog.json", + "type": "application/json", + "title": "CoCliCo STAC Catalog" + } + ], + "stac_extensions": [ + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", + "https://stac-extensions.github.io/table/v1.2.0/schema.json", + "https://stac-extensions.github.io/version/v1.2.0/schema.json" + ], + "item_assets": { + "data": { + "title": "NUTS", + "description": "The NUTS are a hierarchical system divided into 3 levels", + "roles": [ + "data" + ], + "type": "application/vnd.apache.parquet", + "table:storage_options": { + "account_name": "coclico" + }, + "table:columns": [ + { + "name": "NUTS_ID", + "type": "string", + "description": "" + }, + { + "name": "LEVL_CODE", + "type": "int64", + "description": "" + }, + { + "name": "CNTR_CODE", + "type": "string", + "description": "" + }, + { + "name": "NAME_LATN", + "type": "binary", + "description": "" + }, + { + "name": "NUTS_NAME", + "type": "binary", + "description": "" + }, + { + "name": "MOUNT_TYPE", + "type": "double", + "description": "" + }, + { + "name": "URBN_TYPE", + "type": "int64", + "description": "" + }, + { + "name": "COAST_TYPE", + "type": "int64", + "description": "" + }, + { + "name": "geometry", + "type": "binary", + "description": "" + } + ] + } + }, + "base_url": "gs://coclico-data-public/coclico/NUTS", + "sci:citation": "None.", + "version": "1", + "title": "Nomenclature of territorial units for statistics", + "extent": { + "spatial": { + "bbox": [ + [ + -90.23528381409483, + -29.857082115784582, + 103.456913582018, + 39.79889221318682 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2024-01-22T00:00:00Z", + "2024-01-22T00:00:00Z" + ] + ] + } + }, + "license": "CC-BY-4.0", + "keywords": [ + "Local", + "Adminstrative", + "Units", + "European Union", + "Full-Track", + "Background Layer" + ], + "providers": [ + { + "name": "Eurostat", + "roles": [ + "producer", + "licensor" + ], + "url": "https://ec.europa.eu/eurostat/web/gisco/geodata/statistical-units/territorial-units-statistics" + }, + { + "name": "Deltares", + "roles": [ + "processor", + "host" + ], + "url": "https://deltares.nl" + } + ], + "assets": { + "thumbnail": { + "href": "https://storage.googleapis.com/download/storage/v1/b/coclico-data-public/o/coclico%2Fassets%2Fthumbnails%2FNUTS.png?alt=media", + "type": "image/png", + "title": "Thumbnail" + }, + "geoparquet-stac-items": { + "href": "gs://coclico-data-public/coclico/items/NUTS.parquet", + "type": "application/vnd.apache.parquet", + "title": "GeoParquet STAC items", + "description": "Snapshot of the collection's STAC items exported to GeoParquet format.", + "roles": [ + "data" + ] + } + } +} \ No newline at end of file diff --git a/current/NUTS/items/NUTS_RG_01M_2021_3035.json b/current/NUTS/items/NUTS_RG_01M_2021_3035.json new file mode 100644 index 0000000000..959adcae81 --- /dev/null +++ b/current/NUTS/items/NUTS_RG_01M_2021_3035.json @@ -0,0 +1,135 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "id": "NUTS_RG_01M_2021_3035", + "properties": { + "title": "NUTS", + "description": "The NUTS are a hierarchical system divided into 3 levels", + "table:columns": [ + { + "name": "NUTS_ID", + "type": "string", + "description": "" + }, + { + "name": "LEVL_CODE", + "type": "int64", + "description": "" + }, + { + "name": "CNTR_CODE", + "type": "string", + "description": "" + }, + { + "name": "NAME_LATN", + "type": "binary", + "description": "" + }, + { + "name": "NUTS_NAME", + "type": "binary", + "description": "" + }, + { + "name": "MOUNT_TYPE", + "type": "double", + "description": "" + }, + { + "name": "URBN_TYPE", + "type": "int64", + "description": "" + }, + { + "name": "COAST_TYPE", + "type": "int64", + "description": "" + }, + { + "name": "geometry", + "type": "binary", + "description": "" + } + ], + "proj:bbox": [ + -2824230.8134000003, + -3076162.6068, + 10026010.8334, + 6405005.3475 + ], + "proj:epsg": 3035, + "table:row_count": 2010, + "created": "2024-09-24T13:17:57.364236Z", + "datetime": "2024-01-22T00:00:00Z" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 103.456913582018, + -29.857082115784582 + ], + [ + 103.456913582018, + 39.79889221318682 + ], + [ + -90.23528381409483, + 39.79889221318682 + ], + [ + -90.23528381409483, + -29.857082115784582 + ], + [ + 103.456913582018, + -29.857082115784582 + ] + ] + ] + }, + "links": [ + { + "rel": "root", + "href": "../../catalog.json", + "type": "application/json", + "title": "CoCliCo STAC Catalog" + }, + { + "rel": "collection", + "href": "../collection.json", + "type": "application/json", + "title": "Nomenclature of territorial units for statistics" + }, + { + "rel": "parent", + "href": "../collection.json", + "type": "application/json", + "title": "Nomenclature of territorial units for statistics" + } + ], + "assets": { + "data": { + "href": "gs://coclico-data-public/coclico/NUTS/NUTS_RG_01M_2021_3035.parquet", + "type": "application/vnd.apache.parquet", + "title": "NUTS", + "description": "The NUTS are a hierarchical system divided into 3 levels", + "roles": [ + "data" + ] + } + }, + "bbox": [ + -90.23528381409483, + -29.857082115784582, + 103.456913582018, + 39.79889221318682 + ], + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json", + "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + ], + "collection": "NUTS" +} \ No newline at end of file diff --git a/current/catalog.json b/current/catalog.json index 222608da92..1d8690d4b9 100644 --- a/current/catalog.json +++ b/current/catalog.json @@ -156,9 +156,15 @@ }, { "rel": "child", - "href": "./ss_wc/collection.json", + "href": "./LAU/collection.json", "type": "application/json", - "title": "Storm Surge and Wave Climate" + "title": "Local administrative units" + }, + { + "rel": "child", + "href": "./NUTS/collection.json", + "type": "application/json", + "title": "Nomenclature of territorial units for statistics" } ], "assets": { diff --git a/notebooks/99_LAU_NUTS.ipynb b/notebooks/99_LAU_NUTS.ipynb new file mode 100644 index 0000000000..784c51c03a --- /dev/null +++ b/notebooks/99_LAU_NUTS.ipynb @@ -0,0 +1,874 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Notebook to process LAU and NUTS shapefile to parquet" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Load software\n", + "import os\n", + "import pathlib\n", + "import sys\n", + "import json\n", + "import numpy as np\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import xarray as xr\n", + "from dotenv import load_dotenv\n", + "import math\n", + "from pathlib import Path\n", + "from typing import Any, Dict, List, Optional, Tuple, Union\n", + "import rioxarray as rio\n", + "\n", + "# Import custom functionality\n", + "from coclicodata.drive_config import p_drive\n", + "\n", + "# Define (local and) remote drives\n", + "coclico_data_dir = p_drive.joinpath(\"11207608-coclico\", \"FASTTRACK_DATA\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Set path to geopackage\n", + "LAU_dir = coclico_data_dir.joinpath('XX_LAU')\n", + "NUTS_dir = coclico_data_dir.joinpath('XX_NUTS')\n", + "\n", + "LAU_file = LAU_dir.joinpath('LAU_RG_01M_2020_3035.shp')\n", + "NUTS_file = NUTS_dir.joinpath('NUTS_RG_01M_2021_3035.shp')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load data\n", + "LAU = gpd.read_file(LAU_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Load data\n", + "NUTS = gpd.read_file(NUTS_file) " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GISCO_IDCNTR_CODELAU_IDLAU_NAMEPOP_2020POP_DENS_2AREA_KM2YEARFIDgeometry
0CZ_579475CZ579475Libňatov363.062.2401585.8322472020CZ_579475POLYGON ((4747705.585 3057545.260, 4746836.524...
1CZ_579483CZ579483Libotov178.041.7747594.2609462020CZ_579483POLYGON ((4731662.418 3049054.578, 4732297.093...
2CZ_579491CZ579491Vranov181.037.4088734.8384242020CZ_579491POLYGON ((4540137.623 2963864.496, 4538706.113...
3CZ_579505CZ579505Malá Úpa142.05.31851026.6992072020CZ_579505POLYGON ((4730399.261 3087673.334, 4730914.253...
4AT_41004AT41004Eggendorf im Traunkreis1041.0116.4817478.9370232020AT_41004POLYGON ((4631543.792 2788412.230, 4630476.583...
.................................
98608SK_599816SK599816Košice - mestská časť Nad jazerom24443.06686.2115513.6557322020SK_599816POLYGON ((5149120.507 2905279.350, 5150154.014...
98609SK_599824SK599824Košice - mestská časť Juh22692.02324.8819559.7604962020SK_599824POLYGON ((5144451.048 2904986.166, 5144591.047...
98610SK_599841SK599841Košice - mestská časť Šaca5969.0124.67277747.8773332020SK_599841POLYGON ((5141041.890 2900350.394, 5144604.564...
98611SK_599859SK599859Košice - mestská časť Poľov1219.094.04951112.9612582020SK_599859POLYGON ((5141041.890 2900350.394, 5141299.765...
98612SK_599875SK599875Košice - mestská časť Sídlisko Ťahanovce21936.02655.8225238.2595882020SK_599875POLYGON ((5147879.755 2916088.650, 5147433.909...
\n", + "

98613 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " GISCO_ID CNTR_CODE LAU_ID LAU_NAME \\\n", + "0 CZ_579475 CZ 579475 Libňatov \n", + "1 CZ_579483 CZ 579483 Libotov \n", + "2 CZ_579491 CZ 579491 Vranov \n", + "3 CZ_579505 CZ 579505 Malá Úpa \n", + "4 AT_41004 AT 41004 Eggendorf im Traunkreis \n", + "... ... ... ... ... \n", + "98608 SK_599816 SK 599816 Košice - mestská časť Nad jazerom \n", + "98609 SK_599824 SK 599824 Košice - mestská časť Juh \n", + "98610 SK_599841 SK 599841 Košice - mestská časť Šaca \n", + "98611 SK_599859 SK 599859 Košice - mestská časť Poľov \n", + "98612 SK_599875 SK 599875 Košice - mestská časť Sídlisko Ťahanovce \n", + "\n", + " POP_2020 POP_DENS_2 AREA_KM2 YEAR FID \\\n", + "0 363.0 62.240158 5.832247 2020 CZ_579475 \n", + "1 178.0 41.774759 4.260946 2020 CZ_579483 \n", + "2 181.0 37.408873 4.838424 2020 CZ_579491 \n", + "3 142.0 5.318510 26.699207 2020 CZ_579505 \n", + "4 1041.0 116.481747 8.937023 2020 AT_41004 \n", + "... ... ... ... ... ... \n", + "98608 24443.0 6686.211551 3.655732 2020 SK_599816 \n", + "98609 22692.0 2324.881955 9.760496 2020 SK_599824 \n", + "98610 5969.0 124.672777 47.877333 2020 SK_599841 \n", + "98611 1219.0 94.049511 12.961258 2020 SK_599859 \n", + "98612 21936.0 2655.822523 8.259588 2020 SK_599875 \n", + "\n", + " geometry \n", + "0 POLYGON ((4747705.585 3057545.260, 4746836.524... \n", + "1 POLYGON ((4731662.418 3049054.578, 4732297.093... \n", + "2 POLYGON ((4540137.623 2963864.496, 4538706.113... \n", + "3 POLYGON ((4730399.261 3087673.334, 4730914.253... \n", + "4 POLYGON ((4631543.792 2788412.230, 4630476.583... \n", + "... ... \n", + "98608 POLYGON ((5149120.507 2905279.350, 5150154.014... \n", + "98609 POLYGON ((5144451.048 2904986.166, 5144591.047... \n", + "98610 POLYGON ((5141041.890 2900350.394, 5144604.564... \n", + "98611 POLYGON ((5141041.890 2900350.394, 5141299.765... \n", + "98612 POLYGON ((5147879.755 2916088.650, 5147433.909... \n", + "\n", + "[98613 rows x 10 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "LAU" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NUTS_IDLEVL_CODECNTR_CODENAME_LATNNUTS_NAMEMOUNT_TYPEURBN_TYPECOAST_TYPEgeometry
0AL0ALb'5368716970eb726961'b'5368716970eb726961'0.000MULTIPOLYGON (((5121233.536 2221719.441, 51208...
1CZ0CZ?esko?esko0.000POLYGON ((4624843.654 3112209.741, 4625546.618...
2DE0DEDeutschlandDeutschland0.000MULTIPOLYGON (((4355225.365 2715902.993, 43541...
3DK0DKDanmarkDanmark0.000MULTIPOLYGON (((4650502.736 3591342.844, 46503...
4CY0CYb'4bfd70726f73'??????0.000MULTIPOLYGON (((6527040.718 1762367.593, 65267...
..............................
2005NO0B13NOJan MayenJan Mayen3.031POLYGON ((3623747.621 5400386.841, 3624031.138...
2006EE0093EEKesk-EestiKesk-Eesti4.031MULTIPOLYGON (((5216227.688 4159212.769, 52172...
2007NO01NONorgeNorge0.000MULTIPOLYGON (((4961367.759 5413266.131, 49622...
2008NO0B2NOJan Mayen and SvalbardJan Mayen and SvalbardNaN00MULTIPOLYGON (((4744650.828 6379141.635, 47446...
2009NO0B23NOSvalbardSvalbard3.031MULTIPOLYGON (((4760536.161 6404487.632, 47607...
\n", + "

2010 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " NUTS_ID LEVL_CODE CNTR_CODE NAME_LATN \\\n", + "0 AL 0 AL b'5368716970eb726961' \n", + "1 CZ 0 CZ ?esko \n", + "2 DE 0 DE Deutschland \n", + "3 DK 0 DK Danmark \n", + "4 CY 0 CY b'4bfd70726f73' \n", + "... ... ... ... ... \n", + "2005 NO0B1 3 NO Jan Mayen \n", + "2006 EE009 3 EE Kesk-Eesti \n", + "2007 NO0 1 NO Norge \n", + "2008 NO0B 2 NO Jan Mayen and Svalbard \n", + "2009 NO0B2 3 NO Svalbard \n", + "\n", + " NUTS_NAME MOUNT_TYPE URBN_TYPE COAST_TYPE \\\n", + "0 b'5368716970eb726961' 0.0 0 0 \n", + "1 ?esko 0.0 0 0 \n", + "2 Deutschland 0.0 0 0 \n", + "3 Danmark 0.0 0 0 \n", + "4 ?????? 0.0 0 0 \n", + "... ... ... ... ... \n", + "2005 Jan Mayen 3.0 3 1 \n", + "2006 Kesk-Eesti 4.0 3 1 \n", + "2007 Norge 0.0 0 0 \n", + "2008 Jan Mayen and Svalbard NaN 0 0 \n", + "2009 Svalbard 3.0 3 1 \n", + "\n", + " geometry \n", + "0 MULTIPOLYGON (((5121233.536 2221719.441, 51208... \n", + "1 POLYGON ((4624843.654 3112209.741, 4625546.618... \n", + "2 MULTIPOLYGON (((4355225.365 2715902.993, 43541... \n", + "3 MULTIPOLYGON (((4650502.736 3591342.844, 46503... \n", + "4 MULTIPOLYGON (((6527040.718 1762367.593, 65267... \n", + "... ... \n", + "2005 POLYGON ((3623747.621 5400386.841, 3624031.138... \n", + "2006 MULTIPOLYGON (((5216227.688 4159212.769, 52172... \n", + "2007 MULTIPOLYGON (((4961367.759 5413266.131, 49622... \n", + "2008 MULTIPOLYGON (((4744650.828 6379141.635, 47446... \n", + "2009 MULTIPOLYGON (((4760536.161 6404487.632, 47607... \n", + "\n", + "[2010 rows x 9 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "NUTS" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Write data as parquet file\n", + "lau_parquet_file = str(LAU_file).replace('.shp','.parquet')\n", + "LAU.to_parquet(lau_parquet_file)\n", + "\n", + "nuts_parquet_file = str(NUTS_file).replace('.shp','.parquet')\n", + "NUTS.to_parquet(nuts_parquet_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'P:\\\\11207608-coclico\\\\FASTTRACK_DATA\\\\XX_NUTS\\\\NUTS_RG_01M_2021_3035.parquet'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nuts_parquet_file" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GISCO_IDCNTR_CODELAU_IDLAU_NAMEPOP_2020POP_DENS_2AREA_KM2YEARFIDgeometry
0CZ_579475CZ579475Libňatov363.062.2401585.8322472020CZ_579475b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x0b\\x00...
1CZ_579483CZ579483Libotov178.041.7747594.2609462020CZ_579483b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x08\\x00...
2CZ_579491CZ579491Vranov181.037.4088734.8384242020CZ_579491b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\n\\x00\\x...
3CZ_579505CZ579505Malá Úpa142.05.31851026.6992072020CZ_579505b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x17\\x00...
4AT_41004AT41004Eggendorf im Traunkreis1041.0116.4817478.9370232020AT_41004b\"\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x0b\\x00...
.................................
98608SK_599816SK599816Košice - mestská časť Nad jazerom24443.06686.2115513.6557322020SK_599816b\"\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\t\\x00\\x...
98609SK_599824SK599824Košice - mestská časť Juh22692.02324.8819559.7604962020SK_599824b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x10\\x00...
98610SK_599841SK599841Košice - mestská časť Šaca5969.0124.67277747.8773332020SK_599841b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x1d\\x00...
98611SK_599859SK599859Košice - mestská časť Poľov1219.094.04951112.9612582020SK_599859b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x0e\\x00...
98612SK_599875SK599875Košice - mestská časť Sídlisko Ťahanovce21936.02655.8225238.2595882020SK_599875b\"\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x12\\x00...
\n", + "

98613 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " GISCO_ID CNTR_CODE LAU_ID LAU_NAME \\\n", + "0 CZ_579475 CZ 579475 Libňatov \n", + "1 CZ_579483 CZ 579483 Libotov \n", + "2 CZ_579491 CZ 579491 Vranov \n", + "3 CZ_579505 CZ 579505 Malá Úpa \n", + "4 AT_41004 AT 41004 Eggendorf im Traunkreis \n", + "... ... ... ... ... \n", + "98608 SK_599816 SK 599816 Košice - mestská časť Nad jazerom \n", + "98609 SK_599824 SK 599824 Košice - mestská časť Juh \n", + "98610 SK_599841 SK 599841 Košice - mestská časť Šaca \n", + "98611 SK_599859 SK 599859 Košice - mestská časť Poľov \n", + "98612 SK_599875 SK 599875 Košice - mestská časť Sídlisko Ťahanovce \n", + "\n", + " POP_2020 POP_DENS_2 AREA_KM2 YEAR FID \\\n", + "0 363.0 62.240158 5.832247 2020 CZ_579475 \n", + "1 178.0 41.774759 4.260946 2020 CZ_579483 \n", + "2 181.0 37.408873 4.838424 2020 CZ_579491 \n", + "3 142.0 5.318510 26.699207 2020 CZ_579505 \n", + "4 1041.0 116.481747 8.937023 2020 AT_41004 \n", + "... ... ... ... ... ... \n", + "98608 24443.0 6686.211551 3.655732 2020 SK_599816 \n", + "98609 22692.0 2324.881955 9.760496 2020 SK_599824 \n", + "98610 5969.0 124.672777 47.877333 2020 SK_599841 \n", + "98611 1219.0 94.049511 12.961258 2020 SK_599859 \n", + "98612 21936.0 2655.822523 8.259588 2020 SK_599875 \n", + "\n", + " geometry \n", + "0 b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x0b\\x00... \n", + "1 b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x08\\x00... \n", + "2 b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\n\\x00\\x... \n", + "3 b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x17\\x00... \n", + "4 b\"\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x0b\\x00... \n", + "... ... \n", + "98608 b\"\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\t\\x00\\x... \n", + "98609 b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x10\\x00... \n", + "98610 b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x1d\\x00... \n", + "98611 b'\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x0e\\x00... \n", + "98612 b\"\\x01\\x03\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x12\\x00... \n", + "\n", + "[98613 rows x 10 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test new parquet file\n", + "test_parquet = pd.read_parquet(lau_parquet_file)\n", + "test_parquet" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import shapely\n", + "shapely.Polygon(LAU.geometry[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scripts/create_stacs/99_LAU_stacs.py b/scripts/create_stacs/99_LAU_stacs.py new file mode 100644 index 0000000000..192e84a2a1 --- /dev/null +++ b/scripts/create_stacs/99_LAU_stacs.py @@ -0,0 +1,529 @@ +# %% +# ## Load software +import sys + +branch = "dev" +sys.path.insert(0, "../src") + +from coastmonitor.io.drive_config import configure_instance + +is_local_instance = configure_instance(branch=branch) + +import dataclasses +import datetime +import logging +import os +import cv2 +import pathlib +import re +import json +import pyarrow +import gcsfs +import geopandas as gpd +import pandas as pd +import numpy as np +from typing import Any + +import fsspec +import pystac +import stac_geoparquet +from typing import List +from posixpath import join as urljoin +from dotenv import load_dotenv +from pystac.stac_io import DefaultStacIO + +from coclicodata.etl.cloud_utils import load_google_credentials, dir_to_google_cloud, file_to_google_cloud +from coclicodata.drive_config import p_drive +from coclicodata.coclico_stac.reshape_im import reshape_aspectratio_image + +from coastmonitor import stac_table +from coastmonitor.stac.layouts import ParquetLayout + +# %% +# ## Define variables +# hard-coded input params at project level +GCS_PROTOCOL = "https://storage.googleapis.com" +GCS_PROJECT = "coclico-11207608-002" +BUCKET_NAME = "coclico-data-public" +BUCKET_PROJ = "coclico" +PROJ_NAME = "LAU" + +# hard-coded STAC templates +STAC_DIR = pathlib.Path.cwd().parent.parent / "current" + +# hard-coded input params which differ per dataset +DATASET_DIR = "XX_LAU" +# CF_FILE = "Global_merit_coastal_mask_landwards.tif" +COLLECTION_ID = "LAU" # name of stac collection +MAX_FILE_SIZE = 500 # max file size in MB + +# define local directories +home = pathlib.Path().home() +tmp_dir = home.joinpath("data", "tmp") +coclico_data_dir = p_drive.joinpath( + "11207608-coclico", "FASTTRACK_DATA" +) # remote p drive +cred_data_dir = p_drive.joinpath("11207608-coclico", "FASTTRACK_DATA") + +# use local or remote data dir +use_local_data = False + +if use_local_data: + ds_dir = tmp_dir +else: + ds_dir = coclico_data_dir + +if not ds_dir.exists(): + raise FileNotFoundError(f"Data dir does not exist, {str(ds_dir)}") + +# # directory to export result +# cog_dirs = ds_dir.joinpath("cogs") +ds_path = ds_dir.joinpath("XX_LAU") +ds_fp = ds_path.joinpath("LAU_RG_01M_2020_3035.parquet") # file directory + +# # load metadata template +metadata_fp = ds_path.joinpath('metadata',ds_fp.name).with_suffix('.json') +with open(metadata_fp, "r") as f: + metadata = json.load(f) + +# # extend keywords +metadata['KEYWORDS'].extend(["Full-Track", "Background Layer"]) + +# # data output configurations +HREF_PREFIX = urljoin( + GCS_PROTOCOL, BUCKET_NAME, BUCKET_PROJ, PROJ_NAME +) # cloud export directory +TMP_DIR = pathlib.Path.home() / "tmp" + +PARQUET_MEDIA_TYPE = "application/vnd.apache.parquet" + +# CONTAINER_NAME = "transects" +# PREFIX = f"gcts-{TRANSECT_LENGTH}m.parquet" +# BASE_URL = f"gs://{CONTAINER_NAME}/{PREFIX}" +GEOPARQUET_STAC_ITEMS_HREF = ( + f"gs://{BUCKET_NAME}/{BUCKET_PROJ}/items/{COLLECTION_ID}.parquet" +) + +# %% +# %% +def read_parquet_schema_df(uri: str) -> List: # pd.DataFrame: + """Return a Pandas dataframe corresponding to the schema of a local URI of a parquet file. + + The returned dataframe has the columns: column, pa_dtype + """ + # Ref: https://stackoverflow.com/a/64288036/ + # Ref: https://stackoverflow.com/questions/41567081/get-schema-of-parquet-file-in-python + schema = pyarrow.parquet.read_schema(uri, memory_map=True) + # schema = pd.DataFrame(({"name": name, "type": str(pa_dtype)} for name, pa_dtype in zip(schema.names, schema.types))) + schema = [ + { + "name": name, + "type": str(pa_dtype), + "description": "", + } # TODO: add column descriptions once received from the VU + for name, pa_dtype in zip(schema.names, schema.types) + ] + # schema = schema.reindex(columns=["name", "type"], fill_value=pd.NA) # Ensures columns in case the parquet file has an empty dataframe. + return schema + + +def partition_dataframe(df: pd.DataFrame, batch_size: int) -> list[pd.DataFrame]: + """ + Splits a DataFrame into partitions approximately equal to or smaller than the specified batch size. + + Args: + df (pd.DataFrame): The DataFrame to be partitioned. + batch_size (int): The maximum number of rows each partition should have. + + Returns: + List[pd.DataFrame]: A list of DataFrames, each with a size up to the specified batch size. + """ + n_rows = len(df) + if n_rows <= batch_size: + return [df] + + num_partitions = (n_rows + batch_size - 1) // batch_size + partition_size = (n_rows + num_partitions - 1) // num_partitions + + partitions = [ + df.iloc[i : i + partition_size] for i in range(0, n_rows, partition_size) + ] + return partitions + + +@dataclasses.dataclass +class PathParts: + """ + Parses a path into its component parts, supporting variations with and without hive partitioning, + and with and without geographical bounds. + """ + + path: str + container: str | None = None + prefix: str | None = None + name: str | None = None + stac_item_id: str | None = None + + def __post_init__(self) -> None: + # Strip any protocol pattern like "xyz://" + stripped_path = re.sub(r"^\w+://", "", self.path) + split = stripped_path.rstrip("/").split("/") + + # Extract container + self.container = split[0] + + # Determine if there is hive partitioning and extract it + hive_partition_info = [ + "_".join(part.split("=")) for part in split[1:-1] if "=" in part + ] + + # Extract name, which is the filename with the .parquet extension + self.name = split[-1] + + # Construct the stac_item_id + # Include hive partitioning info if present, then add the file name, replacing ".parquet" and ensuring underscores + parts_to_join = hive_partition_info + [self.name.replace(".parquet", "")] + self.stac_item_id = "_".join(parts_to_join) + + +def create_collection( + description: str | None = None, extra_fields: dict[str, Any] | None = None +) -> pystac.Collection: + + providers = [ + pystac.Provider( + name=metadata["PROVIDERS"]["name"], + roles=[ + pystac.provider.ProviderRole.PRODUCER, + pystac.provider.ProviderRole.LICENSOR, + ], + url=metadata["PROVIDERS"]["url"], + ), + pystac.Provider( + name="Deltares", + roles=[ + pystac.provider.ProviderRole.PROCESSOR, + pystac.provider.ProviderRole.HOST, + ], + url="https://deltares.nl", + ), + ] + + start_datetime = datetime.datetime.strptime( + metadata["TEMPORAL_EXTENT"][0].split("T")[0], "%Y-%m-%d" + ) + + extent = pystac.Extent( + pystac.SpatialExtent([metadata["SPATIAL_EXTENT"]]), + pystac.TemporalExtent([[start_datetime, None]]), + ) + + # double check, this is hard-coded! + # links = [ + # pystac.Link( + # pystac.RelType.LICENSE, + # target="https://creativecommons.org/publicdomain/zero/1.0/", + # media_type="text/html", + # title="CC License", + # ) + # ] + + if "Creative Commons" in metadata["LICENSE"] and "4.0" in metadata["LICENSE"]: + metadata["LICENSE"] = "CC-BY-4.0" + + collection = pystac.Collection( + id=COLLECTION_ID, + title=metadata["TITLE"], + description=metadata["DESCRIPTION"], + license=metadata["LICENSE"], + providers=providers, + extent=extent, + catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED, + ) + + collection.add_asset( + "thumbnail", + pystac.Asset( + f"https://storage.googleapis.com/coclico-data-public/coclico/assets/thumbnails/{COLLECTION_ID}.jpeg", + title="Thumbnail", + media_type=pystac.MediaType.JPEG, + ), + ) + # collection.links = links + collection.keywords = metadata["KEYWORDS"] + + pystac.extensions.item_assets.ItemAssetsExtension.add_to(collection) + + collection.extra_fields["item_assets"] = { + "data": { + "title": metadata["TITLE_ABBREVIATION"], + "description": metadata["SHORT_DESCRIPTION"], + "roles": ["data"], + "type": stac_table.PARQUET_MEDIA_TYPE, + **ASSET_EXTRA_FIELDS, + } + } + + if extra_fields: + collection.extra_fields.update(extra_fields) + + pystac.extensions.scientific.ScientificExtension.add_to(collection) + collection.extra_fields["sci:citation"] = metadata["CITATION"] + + collection.stac_extensions.append(stac_table.SCHEMA_URI) + + pystac.extensions.version.VersionExtension.add_to(collection) + collection.extra_fields["version"] = "1" + + return collection + + +def create_item( + asset_href: str, + storage_options: dict[str, Any] | None = None, + asset_extra_fields: dict[str, Any] | None = None, +) -> pystac.Item: + """Create a STAC Item + + For + + Args: + asset_href (str): The HREF pointing to an asset associated with the item + + Returns: + Item: STAC Item object + """ + + parts = PathParts(asset_href) + + properties = { + "title": metadata["TITLE_ABBREVIATION"], + "description": metadata["SHORT_DESCRIPTION"], + } + + dt = datetime.datetime.strptime( + metadata["TEMPORAL_EXTENT"][0].split("T")[0], "%Y-%m-%d" + ) + # shape = shapely.box(*bbox) + # geometry = shapely.geometry.mapping(shape) + template = pystac.Item( + id=parts.stac_item_id, + properties=properties, + geometry=None, + bbox=None, + datetime=dt, + stac_extensions=[], + ) + + item = stac_table.generate( + uri=asset_href, + template=template, + infer_bbox=True, + infer_geometry=None, + datetime_column=None, + infer_datetime=stac_table.InferDatetimeOptions.no, + count_rows=True, + asset_key="data", + asset_extra_fields=asset_extra_fields, + proj=True, + storage_options=storage_options, + validate=False, + ) + assert isinstance(item, pystac.Item) + + item.common_metadata.created = datetime.datetime.utcnow() + + # add descriptions to item properties + if "table:columns" in ASSET_EXTRA_FIELDS and "table:columns" in item.properties: + source_lookup = { + col["name"]: col for col in ASSET_EXTRA_FIELDS["table:columns"] + } + + for target_col in item.properties["table:columns"]: + source_col = source_lookup.get(target_col["name"]) + if source_col: + target_col.setdefault("description", source_col.get("description")) + + # TODO: make configurable upstream + item.assets["data"].title = metadata["TITLE_ABBREVIATION"] + item.assets["data"].description = metadata["SHORT_DESCRIPTION"] + + return item + + +# %% +# ## Do the work +if __name__ == "__main__": + log = logging.getLogger() + log.setLevel(logging.ERROR) + + # loading credentials + load_google_credentials( + google_token_fp=cred_data_dir.joinpath("google_credentials_new.json") + ) + + # %% test if file is multi-indexed, if we need to write to the cloud and whether we need to split files + dum = gpd.read_parquet(ds_fp) # read parquet file + split = "N" # value to determine if we need to split the files + for file in os.listdir(ds_path): + if os.path.getsize(ds_path.joinpath(file)) / 10**6 < MAX_FILE_SIZE: + split = "Y" # change slit to Yes + break + + # bucket content + uri = f"gs://{BUCKET_NAME}/{BUCKET_PROJ}/{PROJ_NAME}" + # storage_options = {"account_name": "coclico", "credential": sas_token} + # fs, token, [root] = fsspec.get_fs_token_paths(uri, storage_options=storage_options) + fs = gcsfs.GCSFileSystem( + gcs_project=GCS_PROJECT, token=os.environ["GOOGLE_APPLICATION_CREDENTIALS"] + ) + paths = fs.glob(uri + "/*.parquet") + uris = ["gs://" + p for p in paths] + + # TODO: build something in for assessing size of parquet data, do this in both the if and elif statements + if ( + dum.index.nlevels > 1 or split == "Y" + ) and paths == []: # if multi-indexed or split and there is nothing in the cloud + files = os.listdir(ds_path) # list all files in the directory + files_clean = [k for k in files if ".parquet" in k] # only select parquet files + + for file in files_clean: + print(file) + file_size = os.path.getsize(ds_path.joinpath(file)) / 10**6 + + if file_size < MAX_FILE_SIZE: # test if file size is smaller than 500MB + dspd = gpd.read_parquet(ds_path.joinpath(file)) # read parquet file + if dum.index.nlevels > 1: + dspd = dspd.reset_index() # reset multi-index + + # write to the cloud, single file + dspd.to_parquet( + f"{uri}/{file}", engine="pyarrow" + ) # or supply with local path if needed + + elif file_size > MAX_FILE_SIZE: # test if file size is smaller than 500MB + dspd = gpd.read_parquet(ds_path.joinpath(file)) # read parquet file + + batch_size = int( + np.ceil(len(dspd) / np.ceil(file_size / MAX_FILE_SIZE)) + ) # calc batch size (max number of rows per partition) + if dum.index.nlevels > 1: + dspd = dspd.reset_index() # reset multi-index + splitted_dspd = partition_dataframe(dspd, batch_size) # calc partitions + + # write to the cloud, all split files + for idx, split_dspd in enumerate(splitted_dspd): + file_name = ( + file.split(".")[0] + + "_{:02d}.".format(idx + 1) + + file.split(".")[1] + ) # add zero-padded index (+1 to start at 1) to file name + split_dspd.to_parquet( + f"{uri}/{file_name}", engine="pyarrow" + ) # or supply with local path if needed + + elif ( + dum.index.nlevels == 1 and split == "N" and paths == [] + ): # if not multi-indexed and no need to split and cloud file does not exist + + # upload directory to the cloud (files already parquet) + dir_to_google_cloud( + dir_path=str(ds_fp), + gcs_project=GCS_PROJECT, + bucket_name=BUCKET_NAME, + bucket_proj=BUCKET_PROJ, + dir_name=PROJ_NAME, + ) + + elif paths: + print('Dataset already exists in the Google Bucket') + + # %% get descriptions + COLUMN_DESCRIPTIONS = read_parquet_schema_df( + uris[0] + ) # select first file of the cloud directory + + ASSET_EXTRA_FIELDS = { + "table:storage_options": {"account_name": "coclico"}, + "table:columns": COLUMN_DESCRIPTIONS, + } + + # %% add to STAC + catalog = pystac.Catalog.from_file(str(STAC_DIR / "catalog.json")) + + stac_io = DefaultStacIO() + layout = ParquetLayout() + + collection = create_collection(extra_fields={"base_url": uri}) + + for uri in uris: + print(uri) + item = create_item(uri) + collection.add_item(item) + + collection.update_extent_from_items() + + items = list(collection.get_all_items()) + items_as_json = [i.to_dict() for i in items] + item_extents = stac_geoparquet.to_geodataframe(items_as_json) + + with fsspec.open(GEOPARQUET_STAC_ITEMS_HREF, mode="wb") as f: + item_extents.to_parquet(f) + + collection.add_asset( + "geoparquet-stac-items", + pystac.Asset( + GEOPARQUET_STAC_ITEMS_HREF, + title="GeoParquet STAC items", + description="Snapshot of the collection's STAC items exported to GeoParquet format.", + media_type=PARQUET_MEDIA_TYPE, + roles=["data"], + ), + ) + + # Set thumbnail directory + THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath('thumbnails') + THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + '.png') + + # Make sure image is reshaped to desired aspect ratio (default = 16/9) + cropped_im = reshape_aspectratio_image(str(THUMB_FILE)) + + # Overwrite image with cropped version + cv2.imwrite(str(THUMB_FILE), cropped_im) + + # Upload thumbnail to cloud + THUMB_URL = file_to_google_cloud(str(THUMB_FILE), + GCS_PROJECT, + BUCKET_NAME, + BUCKET_PROJ, + 'assets/thumbnails', + THUMB_FILE.name, + return_URL = True) + + # Add thumbnail + collection.add_asset( + "thumbnail", + pystac.Asset( + THUMB_URL, # noqa: E501 + title="Thumbnail", + media_type=pystac.MediaType.PNG, + ), + ) + + if catalog.get_child(collection.id): + catalog.remove_child(collection.id) + print(f"Removed child: {collection.id}.") + + catalog.add_child(collection) + + collection.normalize_hrefs(str(STAC_DIR / collection.id), layout) + + collection.validate_all() + + catalog.save( + catalog_type=pystac.CatalogType.SELF_CONTAINED, + dest_href=str(STAC_DIR), + stac_io=stac_io, + ) + +# %% diff --git a/scripts/create_stacs/99_NUTS_stacs.py b/scripts/create_stacs/99_NUTS_stacs.py new file mode 100644 index 0000000000..67002792c5 --- /dev/null +++ b/scripts/create_stacs/99_NUTS_stacs.py @@ -0,0 +1,529 @@ +# %% +# ## Load software +import sys + +branch = "dev" +sys.path.insert(0, "../src") + +from coastmonitor.io.drive_config import configure_instance + +is_local_instance = configure_instance(branch=branch) + +import dataclasses +import datetime +import logging +import os +import cv2 +import pathlib +import re +import json +import pyarrow +import gcsfs +import geopandas as gpd +import pandas as pd +import numpy as np +from typing import Any + +import fsspec +import pystac +import stac_geoparquet +from typing import List +from posixpath import join as urljoin +from dotenv import load_dotenv +from pystac.stac_io import DefaultStacIO + +from coclicodata.etl.cloud_utils import load_google_credentials, dir_to_google_cloud, file_to_google_cloud +from coclicodata.drive_config import p_drive +from coclicodata.coclico_stac.reshape_im import reshape_aspectratio_image + +from coastmonitor import stac_table +from coastmonitor.stac.layouts import ParquetLayout + +# %% +# ## Define variables +# hard-coded input params at project level +GCS_PROTOCOL = "https://storage.googleapis.com" +GCS_PROJECT = "coclico-11207608-002" +BUCKET_NAME = "coclico-data-public" +BUCKET_PROJ = "coclico" +PROJ_NAME = "NUTS" + +# hard-coded STAC templates +STAC_DIR = pathlib.Path.cwd().parent.parent / "current" + +# hard-coded input params which differ per dataset +DATASET_DIR = "XX_NUTS" +# CF_FILE = "Global_merit_coastal_mask_landwards.tif" +COLLECTION_ID = "NUTS" # name of stac collection +MAX_FILE_SIZE = 500 # max file size in MB + +# define local directories +home = pathlib.Path().home() +tmp_dir = home.joinpath("data", "tmp") +coclico_data_dir = p_drive.joinpath( + "11207608-coclico", "FASTTRACK_DATA" +) # remote p drive +cred_data_dir = p_drive.joinpath("11207608-coclico", "FASTTRACK_DATA") + +# use local or remote data dir +use_local_data = False + +if use_local_data: + ds_dir = tmp_dir +else: + ds_dir = coclico_data_dir + +if not ds_dir.exists(): + raise FileNotFoundError(f"Data dir does not exist, {str(ds_dir)}") + +# # directory to export result +# cog_dirs = ds_dir.joinpath("cogs") +ds_path = ds_dir.joinpath("XX_NUTS") +ds_fp = ds_path.joinpath("NUTS_RG_01M_2021_3035.parquet") # file directory + +# # load metadata template +metadata_fp = ds_path.joinpath('metadata',ds_fp.name).with_suffix('.json') +with open(metadata_fp, "r") as f: + metadata = json.load(f) + +# # extend keywords +metadata['KEYWORDS'].extend(["Full-Track", "Background Layer"]) + +# # data output configurations +HREF_PREFIX = urljoin( + GCS_PROTOCOL, BUCKET_NAME, BUCKET_PROJ, PROJ_NAME +) # cloud export directory +TMP_DIR = pathlib.Path.home() / "tmp" + +PARQUET_MEDIA_TYPE = "application/vnd.apache.parquet" + +# CONTAINER_NAME = "transects" +# PREFIX = f"gcts-{TRANSECT_LENGTH}m.parquet" +# BASE_URL = f"gs://{CONTAINER_NAME}/{PREFIX}" +GEOPARQUET_STAC_ITEMS_HREF = ( + f"gs://{BUCKET_NAME}/{BUCKET_PROJ}/items/{COLLECTION_ID}.parquet" +) + +# %% +# %% +def read_parquet_schema_df(uri: str) -> List: # pd.DataFrame: + """Return a Pandas dataframe corresponding to the schema of a local URI of a parquet file. + + The returned dataframe has the columns: column, pa_dtype + """ + # Ref: https://stackoverflow.com/a/64288036/ + # Ref: https://stackoverflow.com/questions/41567081/get-schema-of-parquet-file-in-python + schema = pyarrow.parquet.read_schema(uri, memory_map=True) + # schema = pd.DataFrame(({"name": name, "type": str(pa_dtype)} for name, pa_dtype in zip(schema.names, schema.types))) + schema = [ + { + "name": name, + "type": str(pa_dtype), + "description": "", + } # TODO: add column descriptions once received from the VU + for name, pa_dtype in zip(schema.names, schema.types) + ] + # schema = schema.reindex(columns=["name", "type"], fill_value=pd.NA) # Ensures columns in case the parquet file has an empty dataframe. + return schema + + +def partition_dataframe(df: pd.DataFrame, batch_size: int) -> list[pd.DataFrame]: + """ + Splits a DataFrame into partitions approximately equal to or smaller than the specified batch size. + + Args: + df (pd.DataFrame): The DataFrame to be partitioned. + batch_size (int): The maximum number of rows each partition should have. + + Returns: + List[pd.DataFrame]: A list of DataFrames, each with a size up to the specified batch size. + """ + n_rows = len(df) + if n_rows <= batch_size: + return [df] + + num_partitions = (n_rows + batch_size - 1) // batch_size + partition_size = (n_rows + num_partitions - 1) // num_partitions + + partitions = [ + df.iloc[i : i + partition_size] for i in range(0, n_rows, partition_size) + ] + return partitions + + +@dataclasses.dataclass +class PathParts: + """ + Parses a path into its component parts, supporting variations with and without hive partitioning, + and with and without geographical bounds. + """ + + path: str + container: str | None = None + prefix: str | None = None + name: str | None = None + stac_item_id: str | None = None + + def __post_init__(self) -> None: + # Strip any protocol pattern like "xyz://" + stripped_path = re.sub(r"^\w+://", "", self.path) + split = stripped_path.rstrip("/").split("/") + + # Extract container + self.container = split[0] + + # Determine if there is hive partitioning and extract it + hive_partition_info = [ + "_".join(part.split("=")) for part in split[1:-1] if "=" in part + ] + + # Extract name, which is the filename with the .parquet extension + self.name = split[-1] + + # Construct the stac_item_id + # Include hive partitioning info if present, then add the file name, replacing ".parquet" and ensuring underscores + parts_to_join = hive_partition_info + [self.name.replace(".parquet", "")] + self.stac_item_id = "_".join(parts_to_join) + + +def create_collection( + description: str | None = None, extra_fields: dict[str, Any] | None = None +) -> pystac.Collection: + + providers = [ + pystac.Provider( + name=metadata["PROVIDERS"]["name"], + roles=[ + pystac.provider.ProviderRole.PRODUCER, + pystac.provider.ProviderRole.LICENSOR, + ], + url=metadata["PROVIDERS"]["url"], + ), + pystac.Provider( + name="Deltares", + roles=[ + pystac.provider.ProviderRole.PROCESSOR, + pystac.provider.ProviderRole.HOST, + ], + url="https://deltares.nl", + ), + ] + + start_datetime = datetime.datetime.strptime( + metadata["TEMPORAL_EXTENT"][0].split("T")[0], "%Y-%m-%d" + ) + + extent = pystac.Extent( + pystac.SpatialExtent([metadata["SPATIAL_EXTENT"]]), + pystac.TemporalExtent([[start_datetime, None]]), + ) + + # double check, this is hard-coded! + # links = [ + # pystac.Link( + # pystac.RelType.LICENSE, + # target="https://creativecommons.org/publicdomain/zero/1.0/", + # media_type="text/html", + # title="CC License", + # ) + # ] + + if "Creative Commons" in metadata["LICENSE"] and "4.0" in metadata["LICENSE"]: + metadata["LICENSE"] = "CC-BY-4.0" + + collection = pystac.Collection( + id=COLLECTION_ID, + title=metadata["TITLE"], + description=metadata["DESCRIPTION"], + license=metadata["LICENSE"], + providers=providers, + extent=extent, + catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED, + ) + + collection.add_asset( + "thumbnail", + pystac.Asset( + f"https://storage.googleapis.com/coclico-data-public/coclico/assets/thumbnails/{COLLECTION_ID}.jpeg", + title="Thumbnail", + media_type=pystac.MediaType.JPEG, + ), + ) + # collection.links = links + collection.keywords = metadata["KEYWORDS"] + + pystac.extensions.item_assets.ItemAssetsExtension.add_to(collection) + + collection.extra_fields["item_assets"] = { + "data": { + "title": metadata["TITLE_ABBREVIATION"], + "description": metadata["SHORT_DESCRIPTION"], + "roles": ["data"], + "type": stac_table.PARQUET_MEDIA_TYPE, + **ASSET_EXTRA_FIELDS, + } + } + + if extra_fields: + collection.extra_fields.update(extra_fields) + + pystac.extensions.scientific.ScientificExtension.add_to(collection) + collection.extra_fields["sci:citation"] = metadata["CITATION"] + + collection.stac_extensions.append(stac_table.SCHEMA_URI) + + pystac.extensions.version.VersionExtension.add_to(collection) + collection.extra_fields["version"] = "1" + + return collection + + +def create_item( + asset_href: str, + storage_options: dict[str, Any] | None = None, + asset_extra_fields: dict[str, Any] | None = None, +) -> pystac.Item: + """Create a STAC Item + + For + + Args: + asset_href (str): The HREF pointing to an asset associated with the item + + Returns: + Item: STAC Item object + """ + + parts = PathParts(asset_href) + + properties = { + "title": metadata["TITLE_ABBREVIATION"], + "description": metadata["SHORT_DESCRIPTION"], + } + + dt = datetime.datetime.strptime( + metadata["TEMPORAL_EXTENT"][0].split("T")[0], "%Y-%m-%d" + ) + # shape = shapely.box(*bbox) + # geometry = shapely.geometry.mapping(shape) + template = pystac.Item( + id=parts.stac_item_id, + properties=properties, + geometry=None, + bbox=None, + datetime=dt, + stac_extensions=[], + ) + + item = stac_table.generate( + uri=asset_href, + template=template, + infer_bbox=True, + infer_geometry=None, + datetime_column=None, + infer_datetime=stac_table.InferDatetimeOptions.no, + count_rows=True, + asset_key="data", + asset_extra_fields=asset_extra_fields, + proj=True, + storage_options=storage_options, + validate=False, + ) + assert isinstance(item, pystac.Item) + + item.common_metadata.created = datetime.datetime.utcnow() + + # add descriptions to item properties + if "table:columns" in ASSET_EXTRA_FIELDS and "table:columns" in item.properties: + source_lookup = { + col["name"]: col for col in ASSET_EXTRA_FIELDS["table:columns"] + } + + for target_col in item.properties["table:columns"]: + source_col = source_lookup.get(target_col["name"]) + if source_col: + target_col.setdefault("description", source_col.get("description")) + + # TODO: make configurable upstream + item.assets["data"].title = metadata["TITLE_ABBREVIATION"] + item.assets["data"].description = metadata["SHORT_DESCRIPTION"] + + return item + + +# %% +# ## Do the work +if __name__ == "__main__": + log = logging.getLogger() + log.setLevel(logging.ERROR) + + # loading credentials + load_google_credentials( + google_token_fp=cred_data_dir.joinpath("google_credentials_new.json") + ) + + # %% test if file is multi-indexed, if we need to write to the cloud and whether we need to split files + dum = gpd.read_parquet(ds_fp) # read parquet file + split = "N" # value to determine if we need to split the files + for file in os.listdir(ds_path): + if os.path.getsize(ds_path.joinpath(file)) / 10**6 < MAX_FILE_SIZE: + split = "Y" # change slit to Yes + break + + # bucket content + uri = f"gs://{BUCKET_NAME}/{BUCKET_PROJ}/{PROJ_NAME}" + # storage_options = {"account_name": "coclico", "credential": sas_token} + # fs, token, [root] = fsspec.get_fs_token_paths(uri, storage_options=storage_options) + fs = gcsfs.GCSFileSystem( + gcs_project=GCS_PROJECT, token=os.environ["GOOGLE_APPLICATION_CREDENTIALS"] + ) + paths = fs.glob(uri + "/*.parquet") + uris = ["gs://" + p for p in paths] + + # TODO: build something in for assessing size of parquet data, do this in both the if and elif statements + if ( + dum.index.nlevels > 1 or split == "Y" + ) and paths == []: # if multi-indexed or split and there is nothing in the cloud + files = os.listdir(ds_path) # list all files in the directory + files_clean = [k for k in files if ".parquet" in k] # only select parquet files + + for file in files_clean: + print(file) + file_size = os.path.getsize(ds_path.joinpath(file)) / 10**6 + + if file_size < MAX_FILE_SIZE: # test if file size is smaller than 500MB + dspd = gpd.read_parquet(ds_path.joinpath(file)) # read parquet file + if dum.index.nlevels > 1: + dspd = dspd.reset_index() # reset multi-index + + # write to the cloud, single file + dspd.to_parquet( + f"{uri}/{file}", engine="pyarrow" + ) # or supply with local path if needed + + elif file_size > MAX_FILE_SIZE: # test if file size is smaller than 500MB + dspd = gpd.read_parquet(ds_path.joinpath(file)) # read parquet file + + batch_size = int( + np.ceil(len(dspd) / np.ceil(file_size / MAX_FILE_SIZE)) + ) # calc batch size (max number of rows per partition) + if dum.index.nlevels > 1: + dspd = dspd.reset_index() # reset multi-index + splitted_dspd = partition_dataframe(dspd, batch_size) # calc partitions + + # write to the cloud, all split files + for idx, split_dspd in enumerate(splitted_dspd): + file_name = ( + file.split(".")[0] + + "_{:02d}.".format(idx + 1) + + file.split(".")[1] + ) # add zero-padded index (+1 to start at 1) to file name + split_dspd.to_parquet( + f"{uri}/{file_name}", engine="pyarrow" + ) # or supply with local path if needed + + elif ( + dum.index.nlevels == 1 and split == "N" and paths == [] + ): # if not multi-indexed and no need to split and cloud file does not exist + + # upload directory to the cloud (files already parquet) + dir_to_google_cloud( + dir_path=str(ds_fp), + gcs_project=GCS_PROJECT, + bucket_name=BUCKET_NAME, + bucket_proj=BUCKET_PROJ, + dir_name=PROJ_NAME, + ) + + elif paths: + print('Dataset already exists in the Google Bucket') + + # %% get descriptions + COLUMN_DESCRIPTIONS = read_parquet_schema_df( + uris[0] + ) # select first file of the cloud directory + + ASSET_EXTRA_FIELDS = { + "table:storage_options": {"account_name": "coclico"}, + "table:columns": COLUMN_DESCRIPTIONS, + } + + # %% add to STAC + catalog = pystac.Catalog.from_file(str(STAC_DIR / "catalog.json")) + + stac_io = DefaultStacIO() + layout = ParquetLayout() + + collection = create_collection(extra_fields={"base_url": uri}) + + for uri in uris: + print(uri) + item = create_item(uri) + collection.add_item(item) + + collection.update_extent_from_items() + + items = list(collection.get_all_items()) + items_as_json = [i.to_dict() for i in items] + item_extents = stac_geoparquet.to_geodataframe(items_as_json) + + with fsspec.open(GEOPARQUET_STAC_ITEMS_HREF, mode="wb") as f: + item_extents.to_parquet(f) + + collection.add_asset( + "geoparquet-stac-items", + pystac.Asset( + GEOPARQUET_STAC_ITEMS_HREF, + title="GeoParquet STAC items", + description="Snapshot of the collection's STAC items exported to GeoParquet format.", + media_type=PARQUET_MEDIA_TYPE, + roles=["data"], + ), + ) + + # Set thumbnail directory + THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath('thumbnails') + THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + '.png') + + # Make sure image is reshaped to desired aspect ratio (default = 16/9) + cropped_im = reshape_aspectratio_image(str(THUMB_FILE)) + + # Overwrite image with cropped version + cv2.imwrite(str(THUMB_FILE), cropped_im) + + # Upload thumbnail to cloud + THUMB_URL = file_to_google_cloud(str(THUMB_FILE), + GCS_PROJECT, + BUCKET_NAME, + BUCKET_PROJ, + 'assets/thumbnails', + THUMB_FILE.name, + return_URL = True) + + # Add thumbnail + collection.add_asset( + "thumbnail", + pystac.Asset( + THUMB_URL, # noqa: E501 + title="Thumbnail", + media_type=pystac.MediaType.PNG, + ), + ) + + if catalog.get_child(collection.id): + catalog.remove_child(collection.id) + print(f"Removed child: {collection.id}.") + + catalog.add_child(collection) + + collection.normalize_hrefs(str(STAC_DIR / collection.id), layout) + + collection.validate_all() + + catalog.save( + catalog_type=pystac.CatalogType.SELF_CONTAINED, + dest_href=str(STAC_DIR), + stac_io=stac_io, + ) + +# %% diff --git a/scripts/thumbnails/LAU.png b/scripts/thumbnails/LAU.png new file mode 100644 index 0000000000..3b2b4365cd Binary files /dev/null and b/scripts/thumbnails/LAU.png differ diff --git a/scripts/thumbnails/NUTS.png b/scripts/thumbnails/NUTS.png new file mode 100644 index 0000000000..3b2b4365cd Binary files /dev/null and b/scripts/thumbnails/NUTS.png differ diff --git a/scripts/utils/stac_to_cloud.py b/scripts/utils/stac_to_cloud.py index 5ffb7d159c..1f0e291d7c 100644 --- a/scripts/utils/stac_to_cloud.py +++ b/scripts/utils/stac_to_cloud.py @@ -16,7 +16,7 @@ GCS_PROJECT = "coclico-11207608-002" BUCKET_NAME = "coclico-data-public" BUCKET_PROJ = "coclico" - STAC_NAME = "coclico-stac-cet" #NOTE: if working from main STAC_NAME = 'coclico-stac', if working from branch STAC_NAME = coclico-stac-*** + STAC_NAME = "coclico-stac-NUTS2" #NOTE: if working from main STAC_NAME = 'coclico-stac', if working from branch STAC_NAME = coclico-stac-*** IN_DIRNAME = "current" # hard-coded input params at project level