From f0638931d5238fa14cc0875dbe184fd5b4fc8a7f Mon Sep 17 00:00:00 2001 From: Dipannita Dey Date: Thu, 29 Aug 2024 21:02:27 -0500 Subject: [PATCH] created v2 license based on v1 license details --- scripts/migration/migrate.py | 110 +++++++++++++++++++++++++++++------ 1 file changed, 91 insertions(+), 19 deletions(-) diff --git a/scripts/migration/migrate.py b/scripts/migration/migrate.py index 820e8accf..2766e1cfc 100644 --- a/scripts/migration/migrate.py +++ b/scripts/migration/migrate.py @@ -130,19 +130,21 @@ def get_clowder_v1_dataset_collections(headers, user_v1, dataset_id): user_collections = response.json() for collection in user_collections: collection_id = collection["id"] - if collection['name'] == 'subchild': + if collection["name"] == "subchild": print("HERE") collection_dataset_endpoint = ( f"{CLOWDER_V1}/api/collections/{collection_id}/datasets" ) try: - dataset_response = requests.get(collection_dataset_endpoint, headers=headers) + dataset_response = requests.get( + collection_dataset_endpoint, headers=headers + ) datasets = dataset_response.json() for ds in datasets: if ds["id"] == dataset_id: matching_collections.append(collection) except Exception as e: - print('Exception', e) + print("Exception", e) return matching_collections @@ -271,10 +273,74 @@ def create_admin_user(): return generate_user_api_key(admin_user, admin_user["password"]) +def add_dataset_license(v1_license, headers): + """Create appropriate license (standard/custom) based on v1 license details""" + license_id = "CC-BY" + # standard licenses + if v1_license["license_type"] == "license2": + if ( + not v1_license["ccAllowCommercial"] + and not v1_license["ccAllowDerivative"] + and not v1_license["ccRequireShareAlike"] + ): + license_id = "CC BY-NC-ND" + elif ( + v1_license["ccAllowCommercial"] + and not v1_license["ccAllowDerivative"] + and not v1_license["ccRequireShareAlike"] + ): + license_id = "CC BY-ND" + elif ( + not v1_license["ccAllowCommercial"] + and v1_license["ccAllowDerivative"] + and not v1_license["ccRequireShareAlike"] + ): + license_id = "CC BY-NC" + elif ( + not v1_license["ccAllowCommercial"] + and v1_license["ccAllowDerivative"] + and v1_license["ccRequireShareAlike"] + ): + license_id = "CC BY-NC-SA" + elif ( + v1_license["ccAllowCommercial"] + and v1_license["ccAllowDerivative"] + and v1_license["ccRequireShareAlike"] + ): + license_id = "CC BY-SA" + elif ( + v1_license["ccAllowCommercial"] + and v1_license["ccAllowDerivative"] + and not v1_license["ccRequireShareAlike"] + ): + license_id = "CC BY" + elif v1_license["license_type"] == "license3": + license_id = "CCO Public Domain Dedication" + else: + # custom license + license_body = { + "name": v1_license["license_text"], + "url": v1_license["license_url"], + "holders": v1_license["holders"], + } + if license_body["url"] == "": + license_body["url"] = "https://dbpedia.org/page/All_rights_reserved" + license_v2_endpoint = f"{CLOWDER_V2}/api/v2/licenses?" + response = requests.post( + license_v2_endpoint, headers=headers, json=license_body + ) + print(response.json()) + license_id = response.json()["id"] + return license_id + + def create_v2_dataset(dataset, headers): """Create a dataset in Clowder v2.""" # TODO: GET correct license - dataset_in_v2_endpoint = f"{CLOWDER_V2}/api/v2/datasets?license_id=CC BY" + print(f"Creating dataset license in Clowder v2.") + v2_license_id = add_dataset_license(dataset["license"], headers) + + dataset_in_v2_endpoint = f"{CLOWDER_V2}/api/v2/datasets?license_id={v2_license_id}" dataset_example = { "name": dataset["name"], "description": dataset["description"], @@ -540,6 +606,7 @@ def register_migration_extractor(): f"Failed to register migration extractor in Clowder v2. Status code: {response.status_code}" ) + def add_children(collection_hierarchy_json, remaining_collections): new_json = [] new_remaining_collections = [] @@ -584,22 +651,26 @@ def build_collection_hierarchy(collection_id, headers): remaining_collections.append(col) while len(remaining_collections) > 0: - children, remaining_collections = add_children( - children, remaining_collections - ) + children, remaining_collections = add_children(children, remaining_collections) print("Now we are done") return children + def build_collection_metadata_for_v1_dataset(dataset_id, user_v1, headers): - dataset_collections = get_clowder_v1_dataset_collections(headers=headers, user_v1=user_v1, dataset_id=dataset_id) + dataset_collections = get_clowder_v1_dataset_collections( + headers=headers, user_v1=user_v1, dataset_id=dataset_id + ) collection_data = [] for collection in dataset_collections: - collection_children = build_collection_hierarchy(collection_id=collection['id'], headers=headers) + collection_children = build_collection_hierarchy( + collection_id=collection["id"], headers=headers + ) for child in collection_children: collection_data.append(child) return collection_data + def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP): """Process user resources from Clowder v1 to Clowder v2.""" user_v1_datasets = get_clowder_v1_user_datasets(user_id=user_v1["id"]) @@ -639,22 +710,24 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP): add_file_metadata(file, file_v2_id, clowder_headers_v1, user_headers_v2) # posting the collection hierarchy as metadata # TODO need to actually post this - collection_metadata_dict = build_collection_metadata_for_v1_dataset(dataset_id=dataset['id'],user_v1=user_v1, headers=clowder_headers_v1) + collection_metadata_dict = build_collection_metadata_for_v1_dataset( + dataset_id=dataset["id"], user_v1=user_v1, headers=clowder_headers_v1 + ) migration_extractor_collection_metadata = { - "listener" : { - "name": "migration", - "version": "1", - "description": "Migration of metadata from Clowder v1 to Clowder v2", + "listener": { + "name": "migration", + "version": "1", + "description": "Migration of metadata from Clowder v1 to Clowder v2", }, "context_url": "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", "content": collection_metadata_dict, "contents": collection_metadata_dict, } - v2_metadata_endpoint = ( - f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata" - ) + v2_metadata_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata" response = requests.post( - v2_metadata_endpoint, json=migration_extractor_collection_metadata, headers=clowder_headers_v2 + v2_metadata_endpoint, + json=migration_extractor_collection_metadata, + headers=clowder_headers_v2, ) if response.status_code == 200: print("Successfully added collection info as metadata in v2.") @@ -663,7 +736,6 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP): f"Failed to add collection info as metadata in Clowder v2. Status code: {response.status_code}" ) - return [USER_MAP, DATASET_MAP]