From d45ae3e5183f8cff7b27b1990aef9478fc275aa4 Mon Sep 17 00:00:00 2001 From: Juan Sensio Date: Thu, 8 Feb 2024 12:05:04 +0100 Subject: [PATCH] add api key auth in cli --- eotdl/eotdl/auth/auth.py | 66 ++++++++------- eotdl/eotdl/datasets/download.py | 10 +-- eotdl/eotdl/datasets/ingest.py | 10 +-- eotdl/eotdl/datasets/usecases/__init__.py | 0 .../usecases/datasets/DownloadFile.py | 30 ------- .../usecases/datasets/DownloadFileURL.py | 22 ----- .../usecases/datasets/IngestDataset.py | 30 ------- .../usecases/datasets/IngestLargeDataset.py | 43 ---------- .../datasets/IngestLargeDatasetParallel.py | 51 ------------ .../datasets/usecases/datasets/IngestSTAC.py | 77 ------------------ .../datasets/usecases/datasets/__init__.py | 9 --- eotdl/eotdl/files/ingest.py | 12 +-- eotdl/eotdl/models/download.py | 11 +-- eotdl/eotdl/models/ingest.py | 2 +- eotdl/eotdl/repos/APIRepo.py | 7 ++ eotdl/eotdl/repos/AuthAPIRepo.py | 11 ++- eotdl/eotdl/repos/AuthRepo.py | 6 +- eotdl/eotdl/repos/DatasetsAPIRepo.py | 81 +++---------------- eotdl/eotdl/repos/FilesAPIRepo.py | 51 ++++-------- eotdl/eotdl/repos/ModelsAPIRepo.py | 8 +- .../unit/datasets/_test_download_dataset.py | 17 ---- 21 files changed, 103 insertions(+), 451 deletions(-) delete mode 100755 eotdl/eotdl/datasets/usecases/__init__.py delete mode 100755 eotdl/eotdl/datasets/usecases/datasets/DownloadFile.py delete mode 100755 eotdl/eotdl/datasets/usecases/datasets/DownloadFileURL.py delete mode 100755 eotdl/eotdl/datasets/usecases/datasets/IngestDataset.py delete mode 100755 eotdl/eotdl/datasets/usecases/datasets/IngestLargeDataset.py delete mode 100755 eotdl/eotdl/datasets/usecases/datasets/IngestLargeDatasetParallel.py delete mode 100755 eotdl/eotdl/datasets/usecases/datasets/IngestSTAC.py delete mode 100755 eotdl/eotdl/datasets/usecases/datasets/__init__.py diff --git a/eotdl/eotdl/auth/auth.py b/eotdl/eotdl/auth/auth.py index 45492934..7ab14a32 100755 --- a/eotdl/eotdl/auth/auth.py +++ b/eotdl/eotdl/auth/auth.py @@ -1,4 +1,5 @@ import time +import os from ..repos import AuthRepo, AuthAPIRepo from .errors import LoginError, AuthTimeOut @@ -10,34 +11,43 @@ def auth(max_t=30, interval=2): if user: return user repo, api_repo = AuthRepo(), AuthAPIRepo() - response = api_repo.login() - if response.status_code != 200: - raise LoginError() - data = response.json() - print("On your computer or mobile device navigate to: ", data["login_url"]) - authenticated = False - t0 = time.time() - while not authenticated and time.time() - t0 < max_t: - response = api_repo.token(data["code"]) - token_data = response.json() - if response.status_code == 200: - print("Authenticated!") - print("- Id Token: {}...".format(token_data["id_token"][:10])) - # get user credentials - credentials = api_repo.retrieve_credentials(token_data["id_token"])[0] - if credentials: - token_data.update(credentials) - # save token data in file - creds_path = repo.save_creds(token_data) - print("Saved credentials to: ", creds_path) - current_user = repo.decode_token(token_data) - authenticated = True - current_user["id_token"] = token_data["id_token"] - return current_user - else: - time.sleep(interval) - if not authenticated: - raise AuthTimeOut() + api_key = os.environ.get("EOTDL_API_KEY", None) + if api_key: + print("Using API Key") + user = {"api_key": api_key} + user_data, error = api_repo.retrieve_user_data(user) + if error: + raise LoginError() + user.update({"email": user_data["email"]}) + else: + response = api_repo.login() + if response.status_code != 200: + raise LoginError() + data = response.json() + print("On your computer or mobile device navigate to: ", data["login_url"]) + authenticated = False + t0 = time.time() + while not authenticated and time.time() - t0 < max_t: + response = api_repo.token(data["code"]) + token_data = response.json() + if response.status_code == 200: + print("Authenticated!") + print("- Id Token: {}...".format(token_data["id_token"][:10])) + user = repo.decode_token(token_data) + authenticated = True + user = {"id_token": token_data["id_token"], "email": user["email"]} + else: + time.sleep(interval) + if not authenticated: + raise AuthTimeOut() + # get user credentials + credentials = api_repo.retrieve_credentials(user)[0] + if credentials: + user.update(credentials) + # save token data in file + creds_path = repo.save_creds(user) + print("Saved credentials to: ", creds_path) + return user # auth decorator diff --git a/eotdl/eotdl/datasets/download.py b/eotdl/eotdl/datasets/download.py index 4d476b4e..7111eba4 100755 --- a/eotdl/eotdl/datasets/download.py +++ b/eotdl/eotdl/datasets/download.py @@ -54,7 +54,7 @@ def download_dataset( dst_path = repo.download_file( dataset["id"], filename, - user["id_token"], + user, download_path, file_version, progress=True, @@ -70,7 +70,7 @@ def download_dataset( repo = DatasetsAPIRepo() gdf, error = repo.download_stac( dataset["id"], - user["id_token"], + user, ) if error: raise Exception(error) @@ -89,7 +89,7 @@ def download_dataset( _, filename = href.split("/download/") # will overwrite assets with same name :( repo.download_file_url( - href, filename, f"{download_path}/assets", user["id_token"] + href, filename, f"{download_path}/assets", user ) else: if verbose: @@ -101,6 +101,4 @@ def download_dataset( def download_file_url(url, path, progress=True, logger=print, user=None): repo = FilesAPIRepo() _, filename = url.split("/download/") - return repo.download_file_url( - url, filename, f"{path}/assets", user["id_token"], progress - ) + return repo.download_file_url(url, filename, f"{path}/assets", user, progress) diff --git a/eotdl/eotdl/datasets/ingest.py b/eotdl/eotdl/datasets/ingest.py index a9ca201c..9b2a6749 100755 --- a/eotdl/eotdl/datasets/ingest.py +++ b/eotdl/eotdl/datasets/ingest.py @@ -28,7 +28,7 @@ def retrieve_dataset(metadata, user): raise Exception("Dataset already exists.") if error and error == "Dataset doesn't exist": # create dataset - data, error = repo.create_dataset(metadata.dict(), user["id_token"]) + data, error = repo.create_dataset(metadata.dict(), user) # print(data, error) if error: raise Exception(error) @@ -58,7 +58,7 @@ def retrieve_stac_dataset(dataset_name, user): raise Exception("Dataset already exists.") if error and error == "Dataset doesn't exist": # create dataset - data, error = repo.create_stac_dataset(dataset_name, user["id_token"]) + data, error = repo.create_stac_dataset(dataset_name, user) # print(data, error) if error: raise Exception(error) @@ -87,7 +87,7 @@ def ingest_stac(stac_catalog, logger=None, user=None): data, error = files_repo.ingest_file( v["href"], dataset_id, - user["id_token"], + user, calculate_checksum(v["href"]), # is always absolute? "datasets", version, @@ -101,9 +101,7 @@ def ingest_stac(stac_catalog, logger=None, user=None): break # ingest the STAC catalog into geodb logger("Ingesting STAC catalog...") - data, error = repo.ingest_stac( - json.loads(df.to_json()), dataset_id, user["id_token"] - ) + data, error = repo.ingest_stac(json.loads(df.to_json()), dataset_id, user) if error: # TODO: delete all assets that were uploaded raise Exception(error) diff --git a/eotdl/eotdl/datasets/usecases/__init__.py b/eotdl/eotdl/datasets/usecases/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/eotdl/eotdl/datasets/usecases/datasets/DownloadFile.py b/eotdl/eotdl/datasets/usecases/datasets/DownloadFile.py deleted file mode 100755 index b80a33b8..00000000 --- a/eotdl/eotdl/datasets/usecases/datasets/DownloadFile.py +++ /dev/null @@ -1,30 +0,0 @@ -from pydantic import BaseModel -from ....utils import calculate_checksum - - -class DownloadFile: - def __init__(self, repo, retrieve_dataset, logger): - self.repo = repo - self.retrieve_dataset = retrieve_dataset - self.logger = logger if logger else print - - class Inputs(BaseModel): - dataset: str - file: str - path: str = None - user: dict - checksum: str - - class Outputs(BaseModel): - dst_path: str - - def __call__(self, inputs: Inputs) -> Outputs: - dataset = self.retrieve_dataset(inputs.dataset) - dst_path = self.repo.download_file( - inputs.dataset, inputs.file, inputs.user["id_token"], inputs.path - ) - checksum = calculate_checksum(dst_path) - self.logger(f"Checksum: {checksum}") - if dataset["checksum"] != checksum: - self.logger("Checksums do not match") - return self.Outputs(dst_path=dst_path) diff --git a/eotdl/eotdl/datasets/usecases/datasets/DownloadFileURL.py b/eotdl/eotdl/datasets/usecases/datasets/DownloadFileURL.py deleted file mode 100755 index 9e279720..00000000 --- a/eotdl/eotdl/datasets/usecases/datasets/DownloadFileURL.py +++ /dev/null @@ -1,22 +0,0 @@ -from pydantic import BaseModel - - -class DownloadFileURL: - def __init__(self, repo, logger, progress=True): - self.repo = repo - self.logger = logger if logger else print - self.progress = progress - - class Inputs(BaseModel): - url: str - path: str = None - user: dict - - class Outputs(BaseModel): - dst_path: str - - def __call__(self, inputs: Inputs) -> Outputs: - dst_path = self.repo.download_file_url( - inputs.url, inputs.path, inputs.user["id_token"], progress=self.progress - ) - return self.Outputs(dst_path=dst_path) diff --git a/eotdl/eotdl/datasets/usecases/datasets/IngestDataset.py b/eotdl/eotdl/datasets/usecases/datasets/IngestDataset.py deleted file mode 100755 index ade057b6..00000000 --- a/eotdl/eotdl/datasets/usecases/datasets/IngestDataset.py +++ /dev/null @@ -1,30 +0,0 @@ -from pydantic import BaseModel - - -class IngestDataset: - def __init__(self, repo, logger): - self.repo = repo - self.logger = logger if logger else print - - class Inputs(BaseModel): - name: str - description: str - path: str = None - user: dict - - class Outputs(BaseModel): - dataset: dict - - def __call__(self, inputs: Inputs) -> Outputs: - # allow only zip files - if not inputs.path.endswith(".zip"): - raise Exception("Only zip files are allowed") - self.logger("Ingesting dataset...") - data, error = self.repo.ingest_dataset( - inputs.name, inputs.description, inputs.path, inputs.user["id_token"] - ) - # response = self.repo.ingest_large_dataset(inputs.name, inputs.description, inputs.path, inputs.user['id_token']) - if error: - raise Exception(error) - self.logger("Done") - return self.Outputs(dataset=data) diff --git a/eotdl/eotdl/datasets/usecases/datasets/IngestLargeDataset.py b/eotdl/eotdl/datasets/usecases/datasets/IngestLargeDataset.py deleted file mode 100755 index a66276e8..00000000 --- a/eotdl/eotdl/datasets/usecases/datasets/IngestLargeDataset.py +++ /dev/null @@ -1,43 +0,0 @@ -from pydantic import BaseModel -from ....utils import calculate_checksum - - -class IngestLargeDataset: - def __init__(self, repo, logger): - self.repo = repo - self.logger = logger if logger else print - - class Inputs(BaseModel): - name: str - path: str = None - user: dict - - class Outputs(BaseModel): - dataset: dict - - def __call__(self, inputs: Inputs) -> Outputs: - data, error = self.repo.retrieve_dataset(inputs.name) - if data: - raise Exception("Dataset already exists") - # allow only zip files - if not inputs.path.endswith(".zip"): - raise Exception("Only zip files are allowed") - self.logger("Computing checksum...") - checksum = calculate_checksum(inputs.path) - self.logger(checksum) - self.logger("Ingesting dataset...") - id_token = inputs.user["id_token"] - dataset_id, upload_id, parts = self.repo.prepare_large_upload( - inputs.name, id_token, checksum - ) - self.repo.ingest_large_dataset( - inputs.path, upload_id, dataset_id, id_token, parts - ) - self.logger("\nCompleting upload...") - data, error = self.repo.complete_upload( - inputs.name, id_token, upload_id, dataset_id, checksum - ) - if error: - raise Exception(error) - self.logger("Done") - return self.Outputs(dataset=data) diff --git a/eotdl/eotdl/datasets/usecases/datasets/IngestLargeDatasetParallel.py b/eotdl/eotdl/datasets/usecases/datasets/IngestLargeDatasetParallel.py deleted file mode 100755 index b5689c50..00000000 --- a/eotdl/eotdl/datasets/usecases/datasets/IngestLargeDatasetParallel.py +++ /dev/null @@ -1,51 +0,0 @@ -from pydantic import BaseModel -from eotdl.eotdl.datasets.utils import calculate_checksum - - -class IngestLargeDatasetParallel: - def __init__(self, repo, logger): - self.repo = repo - self.logger = logger - - class Inputs(BaseModel): - name: str - path: str = None - user: dict - threads: int = 0 - - class Outputs(BaseModel): - dataset: dict - - def __call__(self, inputs: Inputs) -> Outputs: - data, error = self.repo.retrieve_dataset(inputs.name) - if data: - raise Exception("Dataset already exists") - # allow only zip files - if not inputs.path.endswith(".zip"): - raise Exception("Only zip files are allowed") - self.logger("Computing checksum...") - checksum = calculate_checksum( - inputs.path - ) # should do this at chunk level, before and after - self.logger(checksum) - self.logger("Ingesting dataset...") - id_token = inputs.user["id_token"] - dataset_id, upload_id, parts = self.repo.prepare_large_upload( - inputs.name, id_token, checksum - ) - self.repo.ingest_large_dataset_parallel( - inputs.path, - upload_id, - dataset_id, - id_token, - parts, - inputs.threads, - ) - self.logger("\nCompleting upload...") - data, error = self.repo.complete_upload( - inputs.name, id_token, upload_id, dataset_id, checksum - ) - if error: - raise Exception(error) - self.logger("Done") - return self.Outputs(dataset=data) diff --git a/eotdl/eotdl/datasets/usecases/datasets/IngestSTAC.py b/eotdl/eotdl/datasets/usecases/datasets/IngestSTAC.py deleted file mode 100755 index a6833d31..00000000 --- a/eotdl/eotdl/datasets/usecases/datasets/IngestSTAC.py +++ /dev/null @@ -1,77 +0,0 @@ -from pydantic import BaseModel -import json -from pathlib import Path -from tqdm import tqdm - -from ....curation.stac import STACDataFrame - - -class IngestSTAC: - def __init__(self, repo, ingest_file, allowed_extensions, logger): - self.repo = repo - self.ingest_file = ingest_file - self.allowed_extensions = allowed_extensions - self.logger = logger if logger else print - - class Inputs(BaseModel): - stac_catalog: Path - user: dict - - class Outputs(BaseModel): - dataset: dict - - def __call__(self, inputs: Inputs) -> Outputs: - # retrieve the user's geodb credentials - # creds, error = self.repo.retrieve_credentials(inputs.user["id_token"]) - # self.validate_credentials(creds) - # load the STAC catalog as a STACsetFrame - self.logger("Loading STAC catalog...") - df = STACDataFrame.from_stac_file(inputs.stac_catalog) - catalog = df[df["type"] == "Catalog"] - assert len(catalog) == 1, "STAC catalog must have exactly one root catalog" - dataset_name = catalog.id.iloc[0] - # create dataset - data, error = self.repo.create_stac_dataset( - dataset_name, inputs.user["id_token"] - ) - if error: - data, error2 = self.repo.retrieve_dataset(dataset_name) - if error2: - raise Exception(error) - if data["uid"] != inputs.user["sub"]: - raise Exception("Dataset already exists.") - dataset_id = data["id"] - # TODO: put size to 0 or else will add up - else: - dataset_id = data["dataset_id"] - # TODO: check that we can ingest in geodb - # upload all assets to EOTDL - self.logger("Uploading assets...") - df2 = df.dropna(subset=["assets"]) - for row in tqdm(df2.iterrows(), total=len(df2)): - # for asset in df.assets.dropna().values[:10]: - try: - for k, v in row[1]["assets"].items(): - data = self.ingest_file( - v["href"], - dataset_id, - self.logger, - self.allowed_extensions + [".tif", ".tiff", ".jpg"], - verbose=False, - root=inputs.stac_catalog.parent.parent, # esto será siempre así en STAC? - ) - file_url = f"{self.repo.url}datasets/{data['dataset_id']}/download/{data['file_name']}" - df.loc[row[0], "assets"][k]["href"] = file_url - except Exception as e: - self.logger(f"Error uploading asset {row[0]}: {e}") - break - # ingest the STAC catalog into geodb - self.logger("Ingesting STAC catalog...") - data, error = self.repo.ingest_stac( - json.loads(df.to_json()), dataset_id, inputs.user["id_token"] - ) - if error: - # TODO: delete all assets that were uploaded - raise Exception(error) - self.logger("Done") - return self.Outputs(dataset=data) diff --git a/eotdl/eotdl/datasets/usecases/datasets/__init__.py b/eotdl/eotdl/datasets/usecases/datasets/__init__.py deleted file mode 100755 index b54836ef..00000000 --- a/eotdl/eotdl/datasets/usecases/datasets/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .DownloadDataset import DownloadDataset -from .DownloadFileURL import DownloadFileURL -from .IngestDataset import IngestDataset -from .IngestLargeDataset import IngestLargeDataset -from .RetrieveDataset import RetrieveDataset -from .RetrieveDatasets import RetrieveDatasets -from .IngestFile import IngestFile -from .IngestFolder import IngestFolder -from .IngestSTAC import IngestSTAC diff --git a/eotdl/eotdl/files/ingest.py b/eotdl/eotdl/files/ingest.py index 68226081..7a15f5da 100755 --- a/eotdl/eotdl/files/ingest.py +++ b/eotdl/eotdl/files/ingest.py @@ -88,7 +88,7 @@ def generate_files_lists( def create_new_version(repo, dataset_or_model_id, user): - data, error = repo.create_version(dataset_or_model_id, user["id_token"]) + data, error = repo.create_version(dataset_or_model_id, user) if error: raise Exception(error) return data["version"] @@ -117,7 +117,7 @@ def ingest_files(repo, dataset_or_model_id, folder, verbose, logger, user, endpo file["path"], dataset_or_model_id, file["checksum"], - user["id_token"], + user, endpoint, ) # print(upload_id, parts) @@ -125,11 +125,11 @@ def ingest_files(repo, dataset_or_model_id, folder, verbose, logger, user, endpo file["absolute_path"], file["size"], upload_id, - user["id_token"], + user, parts, endpoint, ) - files_repo.complete_upload(user["id_token"], upload_id, version, endpoint) + files_repo.complete_upload(user, upload_id, version, endpoint) # ingest new small files in batches if len(upload_files) > 0: logger("generating batches...") @@ -148,7 +148,7 @@ def ingest_files(repo, dataset_or_model_id, folder, verbose, logger, user, endpo memory_file, [f["checksum"] for f in batch], dataset_or_model_id, - user["id_token"], + user, endpoint, version, ) @@ -165,7 +165,7 @@ def ingest_files(repo, dataset_or_model_id, folder, verbose, logger, user, endpo batch, dataset_or_model_id, version, - user["id_token"], + user, endpoint, ) if error: diff --git a/eotdl/eotdl/models/download.py b/eotdl/eotdl/models/download.py index e245e2d4..81224f44 100755 --- a/eotdl/eotdl/models/download.py +++ b/eotdl/eotdl/models/download.py @@ -68,7 +68,7 @@ def download_model( dst_path = repo.download_file( model["id"], filename, - user["id_token"], + user, download_path, file_version, endpoint="models", @@ -108,12 +108,3 @@ def download_model( # else: # logger("To download assets, set assets=True or -a in the CLI.") # return Outputs(dst_path=path) - - -# @with_auth -# def download_file_url(url, path, progress=True, logger=None, user=None): -# api_repo = APIRepo() -# download = DownloadFileURL(api_repo, logger, progress) -# inputs = DownloadFileURL.Inputs(url=url, path=path, user=user) -# outputs = download(inputs) -# return outputs.dst_path diff --git a/eotdl/eotdl/models/ingest.py b/eotdl/eotdl/models/ingest.py index 98760592..aa921950 100755 --- a/eotdl/eotdl/models/ingest.py +++ b/eotdl/eotdl/models/ingest.py @@ -25,7 +25,7 @@ def retrieve_model(metadata, user): raise Exception("Model already exists.") if error and error == "Model doesn't exist": # create dataset - data, error = repo.create_model(metadata.dict(), user["id_token"]) + data, error = repo.create_model(metadata.dict(), user) # print(data, error) if error: raise Exception(error) diff --git a/eotdl/eotdl/repos/APIRepo.py b/eotdl/eotdl/repos/APIRepo.py index de8feab1..ac7d4e20 100755 --- a/eotdl/eotdl/repos/APIRepo.py +++ b/eotdl/eotdl/repos/APIRepo.py @@ -11,3 +11,10 @@ def format_response(self, response): if response.status_code == 200: return response.json(), None return None, response.json()["detail"] + + def generate_headers(self, data): + if "api_key" in data: + return {"X-API-Key": data["api_key"]} + if "id_token" in data: + return {"Authorization": "Bearer " + data["id_token"]} + raise Exception("Invalid headers") diff --git a/eotdl/eotdl/repos/AuthAPIRepo.py b/eotdl/eotdl/repos/AuthAPIRepo.py index c63a18be..68095d96 100755 --- a/eotdl/eotdl/repos/AuthAPIRepo.py +++ b/eotdl/eotdl/repos/AuthAPIRepo.py @@ -16,11 +16,18 @@ def logout_url(self): response = requests.get(self.url + "auth/logout") return response.json()["logout_url"] - def retrieve_credentials(self, id_token): + def retrieve_credentials(self, auth): response = requests.get( self.url + "auth/credentials", - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(auth), ) if response.status_code == 200: return response.json(), None return None, response.json()["detail"] + + def retrieve_user_data(self, auth): + response = requests.get( + self.url + "auth/me", + headers=self.generate_headers(auth), + ) + return self.format_response(response) diff --git a/eotdl/eotdl/repos/AuthRepo.py b/eotdl/eotdl/repos/AuthRepo.py index 5005e982..407a045c 100755 --- a/eotdl/eotdl/repos/AuthRepo.py +++ b/eotdl/eotdl/repos/AuthRepo.py @@ -20,9 +20,9 @@ def load_creds(self): if os.path.exists(self.creds_path): with open(self.creds_path, "r") as f: creds = json.load(f) - user = self.decode_token(creds) - user["id_token"] = creds["id_token"] - return user + if not "id_token" in creds and not "api_key" in creds: + return None + return creds return None def decode_token(self, token_data): diff --git a/eotdl/eotdl/repos/DatasetsAPIRepo.py b/eotdl/eotdl/repos/DatasetsAPIRepo.py index 993a22a9..9aee49a8 100755 --- a/eotdl/eotdl/repos/DatasetsAPIRepo.py +++ b/eotdl/eotdl/repos/DatasetsAPIRepo.py @@ -20,11 +20,11 @@ def retrieve_datasets(self, name, limit): response = requests.get(url) return self.format_response(response) - def create_dataset(self, metadata, id_token): + def create_dataset(self, metadata, user): response = requests.post( self.url + "datasets", json=metadata, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(response) @@ -32,94 +32,33 @@ def retrieve_dataset(self, name): response = requests.get(self.url + "datasets?name=" + name) return self.format_response(response) - def create_version(self, dataset_id, id_token): + def create_version(self, dataset_id, user): response = requests.post( self.url + "datasets/version/" + dataset_id, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(response) - def create_stac_dataset(self, name, id_token): + def create_stac_dataset(self, name, user): response = requests.post( self.url + "datasets/stac", json={"name": name}, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(response) - def ingest_stac(self, stac_json, dataset_id, id_token): + def ingest_stac(self, stac_json, dataset_id, user): response = requests.put( self.url + f"datasets/stac/{dataset_id}", json={"stac": stac_json}, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(response) - def download_stac(self, dataset_id, id_token): + def download_stac(self, dataset_id, user): url = self.url + "datasets/" + dataset_id + "/download" - headers = {"Authorization": "Bearer " + id_token} + headers = self.generate_headers(user) response = requests.get(url, headers=headers) if response.status_code != 200: return None, response.json()["detail"] return gpd.GeoDataFrame.from_features(response.json()["features"]), None - - # def update_dataset(self, name, path, id_token, checksum): - # # check that dataset exists - # data, error = self.retrieve_dataset(name) - # if error: - # return None, error - # # first call to get upload id - # dataset_id = data["id"] - # url = self.url + f"datasets/chunk/{dataset_id}?checksum={checksum}" - # response = requests.get(url, headers={"Authorization": "Bearer " + id_token}) - # if response.status_code != 200: - # return None, response.json()["detail"] - # data = response.json() - # _, upload_id, parts = data["dataset_id"], data["upload_id"], data["parts"] - # # assert dataset_id is None - # content_path = os.path.abspath(path) - # content_size = os.stat(content_path).st_size - # url = self.url + "datasets/chunk" - # chunk_size = 1024 * 1024 * 100 # 100 MiB - # total_chunks = content_size // chunk_size - # headers = { - # "Authorization": "Bearer " + id_token, - # "Upload-Id": upload_id, - # "Dataset-Id": dataset_id, - # } - # # upload chunks sequentially - # pbar = tqdm( - # self.read_in_chunks(open(content_path, "rb"), chunk_size), - # total=total_chunks, - # ) - # index = 0 - # for chunk in pbar: - # offset = index + len(chunk) - # part = index // chunk_size + 1 - # index = offset - # if part not in parts: - # headers["Part-Number"] = str(part) - # file = {"file": chunk} - # r = requests.post(url, files=file, headers=headers) - # if r.status_code != 200: - # return None, r.json()["detail"] - # pbar.set_description( - # "{:.2f}/{:.2f} MB".format( - # offset / 1024 / 1024, content_size / 1024 / 1024 - # ) - # ) - # pbar.close() - # # complete upload - # url = self.url + "datasets/complete" - # r = requests.post( - # url, - # json={"checksum": checksum}, - # headers={ - # "Authorization": "Bearer " + id_token, - # "Upload-Id": upload_id, - # "Dataset-Id": dataset_id, - # }, - # ) - # if r.status_code != 200: - # return None, r.json()["detail"] - # return r.json(), None diff --git a/eotdl/eotdl/repos/FilesAPIRepo.py b/eotdl/eotdl/repos/FilesAPIRepo.py index e8b72342..0f1b02f2 100755 --- a/eotdl/eotdl/repos/FilesAPIRepo.py +++ b/eotdl/eotdl/repos/FilesAPIRepo.py @@ -15,7 +15,7 @@ def ingest_files_batch( batch, # ziped batch of files checksums, dataset_or_model_id, - id_token, + user, endpoint, version=None, ): @@ -26,7 +26,7 @@ def ingest_files_batch( url, files={"batch": ("batch.zip", batch)}, data={"checksums": checksums}, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(reponse) @@ -35,7 +35,7 @@ def add_files_batch_to_version( batch, dataset_or_model_id, version, - id_token, + user, endpoint, ): reponse = requests.post( @@ -44,12 +44,12 @@ def add_files_batch_to_version( "filenames": [f["path"] for f in batch], "checksums": [f["checksum"] for f in batch], }, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(reponse) def ingest_file( - self, file, dataset_or_model_id, id_token, checksum, endpoint, version=None + self, file, dataset_or_model_id, user, checksum, endpoint, version=None ): # TODO: ingest file URL url = self.url + f"{endpoint}/{dataset_or_model_id}" @@ -59,7 +59,7 @@ def ingest_file( url, files={"file": open(file, "rb")}, data={"checksum": checksum}, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(reponse) @@ -74,7 +74,7 @@ def download_file( self, dataset_or_model_id, file_name, - id_token, + user, path, file_version, endpoint="datasets", @@ -83,10 +83,10 @@ def download_file( url = self.url + f"{endpoint}/{dataset_or_model_id}/download/{file_name}" if file_version is not None: url += "?version=" + str(file_version) - return self.download_file_url(url, file_name, path, id_token, progress=progress) + return self.download_file_url(url, file_name, path, user, progress=progress) - def download_file_url(self, url, filename, path, id_token, progress=False): - headers = {"Authorization": "Bearer " + id_token} + def download_file_url(self, url, filename, path, user, progress=False): + headers = self.generate_headers(user) path = f"{path}/{filename}" for i in range(1, len(path.split("/")) - 1): # print("/".join(path.split("/")[: i + 1])) @@ -114,23 +114,13 @@ def download_file_url(self, url, filename, path, id_token, progress=False): progress_bar.close() return path - # def ingest_file_url(self, file, dataset, id_token): - # reponse = requests.post( - # self.url + f"datasets/{dataset}/url", - # json={"url": file}, - # headers={"Authorization": "Bearer " + id_token}, - # ) - # if reponse.status_code != 200: - # return None, reponse.json()["detail"] - # return reponse.json(), None - def prepare_large_upload( - self, filename, dataset_or_model_id, checksum, id_token, endpoint + self, filename, dataset_or_model_id, checksum, user, endpoint ): response = requests.post( self.url + f"{endpoint}/{dataset_or_model_id}/uploadId", json={"filname": filename, "checksum": checksum}, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) if response.status_code != 200: raise Exception(response.json()["detail"]) @@ -158,7 +148,7 @@ def read_in_chunks(self, file_object, CHUNK_SIZE): yield data def ingest_large_file( - self, file_path, files_size, upload_id, id_token, parts, endpoint + self, file_path, files_size, upload_id, user, parts, endpoint ): print(endpoint) # content_path = os.path.abspath(file) @@ -181,7 +171,7 @@ def ingest_large_file( f"{self.url}{endpoint}/chunk/{upload_id}", files={"file": chunk}, data={"part_number": part, "checksum": checksum}, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) if response.status_code != 200: raise Exception(response.json()["detail"]) @@ -193,18 +183,9 @@ def ingest_large_file( pbar.close() return - def complete_upload(self, id_token, upload_id, version, endpoint): + def complete_upload(self, user, upload_id, version, endpoint): r = requests.post( f"{self.url}{endpoint}/complete/{upload_id}?version={version}", - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(r) - - # def delete_file(self, dataset_id, file_name, id_token): - # response = requests.delete( - # self.url + "datasets/" + dataset_id + "/file/" + file_name, - # headers={"Authorization": "Bearer " + id_token}, - # ) - # if response.status_code != 200: - # return None, response.json()["detail"] - # return response.json(), None diff --git a/eotdl/eotdl/repos/ModelsAPIRepo.py b/eotdl/eotdl/repos/ModelsAPIRepo.py index 4e46ba3b..a30743eb 100755 --- a/eotdl/eotdl/repos/ModelsAPIRepo.py +++ b/eotdl/eotdl/repos/ModelsAPIRepo.py @@ -19,11 +19,11 @@ def retrieve_models(self, name, limit): response = requests.get(url) return self.format_response(response) - def create_model(self, metadata, id_token): + def create_model(self, metadata, user): response = requests.post( self.url + "models", json=metadata, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(response) @@ -31,9 +31,9 @@ def retrieve_model(self, name): response = requests.get(self.url + "models?name=" + name) return self.format_response(response) - def create_version(self, model_id, id_token): + def create_version(self, model_id, user): response = requests.post( self.url + "models/version/" + model_id, - headers={"Authorization": "Bearer " + id_token}, + headers=self.generate_headers(user), ) return self.format_response(response) diff --git a/eotdl/tests/unit/datasets/_test_download_dataset.py b/eotdl/tests/unit/datasets/_test_download_dataset.py index c1d8b71e..3b85eb14 100755 --- a/eotdl/tests/unit/datasets/_test_download_dataset.py +++ b/eotdl/tests/unit/datasets/_test_download_dataset.py @@ -17,20 +17,3 @@ def dataset(): @pytest.fixture def user(): return {"id_token": "test token"} - - -# @patch("eotdl.src.utils.calculate_checksum") # mock not working :( -# def test_download_one_file(mock_checksum, dataset, user): -# repo, retrieve_dataset, logger = mock.Mock(), mock.Mock(), mock.Mock() -# mock_checksum.return_value = "123" -# download = DownloadDataset(repo, retrieve_dataset, logger) -# inputs = DownloadDataset.Inputs( -# dataset=dataset["name"], file="test-file", user=user -# ) -# retrieve_dataset.return_value = dataset -# repo.download_file.return_value = "dst_path" -# outputs = download(inputs) -# assert outputs.dst_path == "dst_path" -# repo.download_file.assert_called_once_with( -# dataset["name"], dataset["id"], "test-file", user["id_token"], None -# )