Skip to content

Commit

Permalink
Update LAU NUTS
Browse files Browse the repository at this point in the history
quick fixes
  • Loading branch information
EtienneKras committed Oct 2, 2024
1 parent d653f8a commit 491a7f8
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 34 deletions.
2 changes: 1 addition & 1 deletion current/LAU/collection.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
"Units",
"European Union",
"Full-Track",
"Background Layer"
"Background Layers"
],
"providers": [
{
Expand Down
2 changes: 1 addition & 1 deletion current/NUTS/collection.json
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
"Units",
"European Union",
"Full-Track",
"Background Layer"
"Background Layers"
],
"providers": [
{
Expand Down
39 changes: 23 additions & 16 deletions scripts/create_stacs/99_LAU_stacs.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@
from dotenv import load_dotenv
from pystac.stac_io import DefaultStacIO

from coclicodata.etl.cloud_utils import load_google_credentials, dir_to_google_cloud, file_to_google_cloud
from coclicodata.etl.cloud_utils import (
load_google_credentials,
dir_to_google_cloud,
file_to_google_cloud,
)
from coclicodata.drive_config import p_drive
from coclicodata.coclico_stac.reshape_im import reshape_aspectratio_image

Expand Down Expand Up @@ -82,12 +86,12 @@
ds_fp = ds_path.joinpath("LAU_RG_01M_2020_3035.parquet") # file directory

# # load metadata template
metadata_fp = ds_path.joinpath('metadata',ds_fp.name).with_suffix('.json')
metadata_fp = ds_path.joinpath("metadata", ds_fp.name).with_suffix(".json")
with open(metadata_fp, "r") as f:
metadata = json.load(f)

# # extend keywords
metadata['KEYWORDS'].extend(["Full-Track", "Background Layer"])
metadata["KEYWORDS"].extend(["Full-Track", "Background Layers"])

# # data output configurations
HREF_PREFIX = urljoin(
Expand All @@ -104,6 +108,7 @@
f"gs://{BUCKET_NAME}/{BUCKET_PROJ}/items/{COLLECTION_ID}.parquet"
)


# %%
# %%
def read_parquet_schema_df(uri: str) -> List: # pd.DataFrame:
Expand Down Expand Up @@ -363,7 +368,7 @@ def create_item(
)

# %% test if file is multi-indexed, if we need to write to the cloud and whether we need to split files
dum = gpd.read_parquet(ds_fp) # read parquet file
dum = gpd.read_parquet(ds_fp) # read parquet file
split = "N" # value to determine if we need to split the files
for file in os.listdir(ds_path):
if os.path.getsize(ds_path.joinpath(file)) / 10**6 < MAX_FILE_SIZE:
Expand Down Expand Up @@ -434,9 +439,9 @@ def create_item(
bucket_proj=BUCKET_PROJ,
dir_name=PROJ_NAME,
)

elif paths:
print('Dataset already exists in the Google Bucket')
print("Dataset already exists in the Google Bucket")

# %% get descriptions
COLUMN_DESCRIPTIONS = read_parquet_schema_df(
Expand Down Expand Up @@ -481,9 +486,9 @@ def create_item(
),
)

# Set thumbnail directory
THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath('thumbnails')
THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + '.png')
# Set thumbnail directory
THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath("thumbnails")
THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + ".png")

# Make sure image is reshaped to desired aspect ratio (default = 16/9)
cropped_im = reshape_aspectratio_image(str(THUMB_FILE))
Expand All @@ -492,13 +497,15 @@ def create_item(
cv2.imwrite(str(THUMB_FILE), cropped_im)

# Upload thumbnail to cloud
THUMB_URL = file_to_google_cloud(str(THUMB_FILE),
GCS_PROJECT,
BUCKET_NAME,
BUCKET_PROJ,
'assets/thumbnails',
THUMB_FILE.name,
return_URL = True)
THUMB_URL = file_to_google_cloud(
str(THUMB_FILE),
GCS_PROJECT,
BUCKET_NAME,
BUCKET_PROJ,
"assets/thumbnails",
THUMB_FILE.name,
return_URL=True,
)

# Add thumbnail
collection.add_asset(
Expand Down
39 changes: 23 additions & 16 deletions scripts/create_stacs/99_NUTS_stacs.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@
from dotenv import load_dotenv
from pystac.stac_io import DefaultStacIO

from coclicodata.etl.cloud_utils import load_google_credentials, dir_to_google_cloud, file_to_google_cloud
from coclicodata.etl.cloud_utils import (
load_google_credentials,
dir_to_google_cloud,
file_to_google_cloud,
)
from coclicodata.drive_config import p_drive
from coclicodata.coclico_stac.reshape_im import reshape_aspectratio_image

Expand Down Expand Up @@ -82,12 +86,12 @@
ds_fp = ds_path.joinpath("NUTS_RG_01M_2021_3035.parquet") # file directory

# # load metadata template
metadata_fp = ds_path.joinpath('metadata',ds_fp.name).with_suffix('.json')
metadata_fp = ds_path.joinpath("metadata", ds_fp.name).with_suffix(".json")
with open(metadata_fp, "r") as f:
metadata = json.load(f)

# # extend keywords
metadata['KEYWORDS'].extend(["Full-Track", "Background Layer"])
metadata["KEYWORDS"].extend(["Full-Track", "Background Layers"])

# # data output configurations
HREF_PREFIX = urljoin(
Expand All @@ -104,6 +108,7 @@
f"gs://{BUCKET_NAME}/{BUCKET_PROJ}/items/{COLLECTION_ID}.parquet"
)


# %%
# %%
def read_parquet_schema_df(uri: str) -> List: # pd.DataFrame:
Expand Down Expand Up @@ -363,7 +368,7 @@ def create_item(
)

# %% test if file is multi-indexed, if we need to write to the cloud and whether we need to split files
dum = gpd.read_parquet(ds_fp) # read parquet file
dum = gpd.read_parquet(ds_fp) # read parquet file
split = "N" # value to determine if we need to split the files
for file in os.listdir(ds_path):
if os.path.getsize(ds_path.joinpath(file)) / 10**6 < MAX_FILE_SIZE:
Expand Down Expand Up @@ -434,9 +439,9 @@ def create_item(
bucket_proj=BUCKET_PROJ,
dir_name=PROJ_NAME,
)

elif paths:
print('Dataset already exists in the Google Bucket')
print("Dataset already exists in the Google Bucket")

# %% get descriptions
COLUMN_DESCRIPTIONS = read_parquet_schema_df(
Expand Down Expand Up @@ -481,9 +486,9 @@ def create_item(
),
)

# Set thumbnail directory
THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath('thumbnails')
THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + '.png')
# Set thumbnail directory
THUMB_DIR = pathlib.Path(__file__).parent.parent.joinpath("thumbnails")
THUMB_FILE = THUMB_DIR.joinpath(COLLECTION_ID + ".png")

# Make sure image is reshaped to desired aspect ratio (default = 16/9)
cropped_im = reshape_aspectratio_image(str(THUMB_FILE))
Expand All @@ -492,13 +497,15 @@ def create_item(
cv2.imwrite(str(THUMB_FILE), cropped_im)

# Upload thumbnail to cloud
THUMB_URL = file_to_google_cloud(str(THUMB_FILE),
GCS_PROJECT,
BUCKET_NAME,
BUCKET_PROJ,
'assets/thumbnails',
THUMB_FILE.name,
return_URL = True)
THUMB_URL = file_to_google_cloud(
str(THUMB_FILE),
GCS_PROJECT,
BUCKET_NAME,
BUCKET_PROJ,
"assets/thumbnails",
THUMB_FILE.name,
return_URL=True,
)

# Add thumbnail
collection.add_asset(
Expand Down

0 comments on commit 491a7f8

Please sign in to comment.