Update LAU NUTS

quick fixes
openearth · Oct 2, 2024 · 491a7f8 · 491a7f8
1 parent d653f8a
commit 491a7f8
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 34 deletions.
diff --git a/current/LAU/collection.json b/current/LAU/collection.json
@@ -124,7 +124,7 @@
     "Units",
     "European Union",
     "Full-Track",
-    "Background Layer"
+    "Background Layers"
   ],
   "providers": [
     {

diff --git a/current/NUTS/collection.json b/current/NUTS/collection.json
@@ -119,7 +119,7 @@
     "Units",
     "European Union",
     "Full-Track",
-    "Background Layer"
+    "Background Layers"
   ],
   "providers": [
     {

diff --git a/scripts/create_stacs/99_LAU_stacs.py b/scripts/create_stacs/99_LAU_stacs.py
@@ -32,7 +32,11 @@
 from dotenv import load_dotenv
 from pystac.stac_io import DefaultStacIO
 
-from coclicodata.etl.cloud_utils import load_google_credentials, dir_to_google_cloud, file_to_google_cloud
+from coclicodata.etl.cloud_utils import (
+    load_google_credentials,
+    dir_to_google_cloud,
+    file_to_google_cloud,
+)
 from coclicodata.drive_config import p_drive
 from coclicodata.coclico_stac.reshape_im import reshape_aspectratio_image
 
@@ -82,12 +86,12 @@
 ds_fp = ds_path.joinpath("LAU_RG_01M_2020_3035.parquet")  # file directory
 
 # # load metadata template
-metadata_fp = ds_path.joinpath('metadata',ds_fp.name).with_suffix('.json')
+metadata_fp = ds_path.joinpath("metadata", ds_fp.name).with_suffix(".json")
 with open(metadata_fp, "r") as f:
     metadata = json.load(f)
 
 # # extend keywords
-metadata['KEYWORDS'].extend(["Full-Track", "Background Layer"])
+metadata["KEYWORDS"].extend(["Full-Track", "Background Layers"])
 
 # # data output configurations
 HREF_PREFIX = urljoin(
@@ -104,6 +108,7 @@
     f"gs://{BUCKET_NAME}/{BUCKET_PROJ}/items/{COLLECTION_ID}.parquet"
 )
 
+
 # %%
 # %%
 def read_parquet_schema_df(uri: str) -> List:  # pd.DataFrame:
@@ -363,7 +368,7 @@ def create_item(
     )
 
     # %% test if file is multi-indexed, if we need to write to the cloud and whether we need to split files
-    dum = gpd.read_parquet(ds_fp) # read parquet file
+    dum = gpd.read_parquet(ds_fp)  # read parquet file
     split = "N"  # value to determine if we need to split the files
     for file in os.listdir(ds_path):
         if os.path.getsize(ds_path.joinpath(file)) / 10**6 < MAX_FILE_SIZE:
@@ -434,9 +439,9 @@ def create_item(
             bucket_proj=BUCKET_PROJ,
             dir_name=PROJ_NAME,
         )
-    
+
     elif paths:
-        print('Dataset already exists in the Google Bucket')
+        print("Dataset already exists in the Google Bucket")
 
     # %% get descriptions
     COLUMN_DESCRIPTIONS = read_parquet_schema_df(
@@ -481,9 +486,9 @@ def create_item(
         ),
     )
 
-     # Set thumbnail directory
-    THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath('thumbnails')
-    THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + '.png')
+    # Set thumbnail directory
+    THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath("thumbnails")
+    THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + ".png")
 
     # Make sure image is reshaped to desired aspect ratio (default = 16/9)
     cropped_im = reshape_aspectratio_image(str(THUMB_FILE))
@@ -492,13 +497,15 @@ def create_item(
     cv2.imwrite(str(THUMB_FILE), cropped_im)
 
     # Upload thumbnail to cloud
-    THUMB_URL = file_to_google_cloud(str(THUMB_FILE),
-                                    GCS_PROJECT,
-                                    BUCKET_NAME,
-                                    BUCKET_PROJ,
-                                    'assets/thumbnails',
-                                    THUMB_FILE.name, 
-                                    return_URL = True)
+    THUMB_URL = file_to_google_cloud(
+        str(THUMB_FILE),
+        GCS_PROJECT,
+        BUCKET_NAME,
+        BUCKET_PROJ,
+        "assets/thumbnails",
+        THUMB_FILE.name,
+        return_URL=True,
+    )
 
     # Add thumbnail
     collection.add_asset(

diff --git a/scripts/create_stacs/99_NUTS_stacs.py b/scripts/create_stacs/99_NUTS_stacs.py
@@ -32,7 +32,11 @@
 from dotenv import load_dotenv
 from pystac.stac_io import DefaultStacIO
 
-from coclicodata.etl.cloud_utils import load_google_credentials, dir_to_google_cloud, file_to_google_cloud
+from coclicodata.etl.cloud_utils import (
+    load_google_credentials,
+    dir_to_google_cloud,
+    file_to_google_cloud,
+)
 from coclicodata.drive_config import p_drive
 from coclicodata.coclico_stac.reshape_im import reshape_aspectratio_image
 
@@ -82,12 +86,12 @@
 ds_fp = ds_path.joinpath("NUTS_RG_01M_2021_3035.parquet")  # file directory
 
 # # load metadata template
-metadata_fp = ds_path.joinpath('metadata',ds_fp.name).with_suffix('.json')
+metadata_fp = ds_path.joinpath("metadata", ds_fp.name).with_suffix(".json")
 with open(metadata_fp, "r") as f:
     metadata = json.load(f)
 
 # # extend keywords
-metadata['KEYWORDS'].extend(["Full-Track", "Background Layer"])
+metadata["KEYWORDS"].extend(["Full-Track", "Background Layers"])
 
 # # data output configurations
 HREF_PREFIX = urljoin(
@@ -104,6 +108,7 @@
     f"gs://{BUCKET_NAME}/{BUCKET_PROJ}/items/{COLLECTION_ID}.parquet"
 )
 
+
 # %%
 # %%
 def read_parquet_schema_df(uri: str) -> List:  # pd.DataFrame:
@@ -363,7 +368,7 @@ def create_item(
     )
 
     # %% test if file is multi-indexed, if we need to write to the cloud and whether we need to split files
-    dum = gpd.read_parquet(ds_fp) # read parquet file
+    dum = gpd.read_parquet(ds_fp)  # read parquet file
     split = "N"  # value to determine if we need to split the files
     for file in os.listdir(ds_path):
         if os.path.getsize(ds_path.joinpath(file)) / 10**6 < MAX_FILE_SIZE:
@@ -434,9 +439,9 @@ def create_item(
             bucket_proj=BUCKET_PROJ,
             dir_name=PROJ_NAME,
         )
-    
+
     elif paths:
-        print('Dataset already exists in the Google Bucket')
+        print("Dataset already exists in the Google Bucket")
 
     # %% get descriptions
     COLUMN_DESCRIPTIONS = read_parquet_schema_df(
@@ -481,9 +486,9 @@ def create_item(
         ),
     )
 
-     # Set thumbnail directory
-    THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath('thumbnails')
-    THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + '.png')
+    # Set thumbnail directory
+    THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath("thumbnails")
+    THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + ".png")
 
     # Make sure image is reshaped to desired aspect ratio (default = 16/9)
     cropped_im = reshape_aspectratio_image(str(THUMB_FILE))
@@ -492,13 +497,15 @@ def create_item(
     cv2.imwrite(str(THUMB_FILE), cropped_im)
 
     # Upload thumbnail to cloud
-    THUMB_URL = file_to_google_cloud(str(THUMB_FILE),
-                                    GCS_PROJECT,
-                                    BUCKET_NAME,
-                                    BUCKET_PROJ,
-                                    'assets/thumbnails',
-                                    THUMB_FILE.name, 
-                                    return_URL = True)
+    THUMB_URL = file_to_google_cloud(
+        str(THUMB_FILE),
+        GCS_PROJECT,
+        BUCKET_NAME,
+        BUCKET_PROJ,
+        "assets/thumbnails",
+        THUMB_FILE.name,
+        return_URL=True,
+    )
 
     # Add thumbnail
     collection.add_asset(