Skip to content

Commit

Permalink
created v2 license based on v1 license details
Browse files Browse the repository at this point in the history
  • Loading branch information
ddey2 committed Aug 30, 2024
1 parent 1199588 commit f063893
Showing 1 changed file with 91 additions and 19 deletions.
110 changes: 91 additions & 19 deletions scripts/migration/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,19 +130,21 @@ def get_clowder_v1_dataset_collections(headers, user_v1, dataset_id):
user_collections = response.json()
for collection in user_collections:
collection_id = collection["id"]
if collection['name'] == 'subchild':
if collection["name"] == "subchild":
print("HERE")
collection_dataset_endpoint = (
f"{CLOWDER_V1}/api/collections/{collection_id}/datasets"
)
try:
dataset_response = requests.get(collection_dataset_endpoint, headers=headers)
dataset_response = requests.get(
collection_dataset_endpoint, headers=headers
)
datasets = dataset_response.json()
for ds in datasets:
if ds["id"] == dataset_id:
matching_collections.append(collection)
except Exception as e:
print('Exception', e)
print("Exception", e)
return matching_collections


Expand Down Expand Up @@ -271,10 +273,74 @@ def create_admin_user():
return generate_user_api_key(admin_user, admin_user["password"])


def add_dataset_license(v1_license, headers):
"""Create appropriate license (standard/custom) based on v1 license details"""
license_id = "CC-BY"
# standard licenses
if v1_license["license_type"] == "license2":
if (
not v1_license["ccAllowCommercial"]
and not v1_license["ccAllowDerivative"]
and not v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-NC-ND"
elif (
v1_license["ccAllowCommercial"]
and not v1_license["ccAllowDerivative"]
and not v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-ND"
elif (
not v1_license["ccAllowCommercial"]
and v1_license["ccAllowDerivative"]
and not v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-NC"
elif (
not v1_license["ccAllowCommercial"]
and v1_license["ccAllowDerivative"]
and v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-NC-SA"
elif (
v1_license["ccAllowCommercial"]
and v1_license["ccAllowDerivative"]
and v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-SA"
elif (
v1_license["ccAllowCommercial"]
and v1_license["ccAllowDerivative"]
and not v1_license["ccRequireShareAlike"]
):
license_id = "CC BY"
elif v1_license["license_type"] == "license3":
license_id = "CCO Public Domain Dedication"
else:
# custom license
license_body = {
"name": v1_license["license_text"],
"url": v1_license["license_url"],
"holders": v1_license["holders"],
}
if license_body["url"] == "":
license_body["url"] = "https://dbpedia.org/page/All_rights_reserved"
license_v2_endpoint = f"{CLOWDER_V2}/api/v2/licenses?"
response = requests.post(
license_v2_endpoint, headers=headers, json=license_body
)
print(response.json())
license_id = response.json()["id"]
return license_id


def create_v2_dataset(dataset, headers):
"""Create a dataset in Clowder v2."""
# TODO: GET correct license
dataset_in_v2_endpoint = f"{CLOWDER_V2}/api/v2/datasets?license_id=CC BY"
print(f"Creating dataset license in Clowder v2.")
v2_license_id = add_dataset_license(dataset["license"], headers)

dataset_in_v2_endpoint = f"{CLOWDER_V2}/api/v2/datasets?license_id={v2_license_id}"
dataset_example = {
"name": dataset["name"],
"description": dataset["description"],
Expand Down Expand Up @@ -540,6 +606,7 @@ def register_migration_extractor():
f"Failed to register migration extractor in Clowder v2. Status code: {response.status_code}"
)


def add_children(collection_hierarchy_json, remaining_collections):
new_json = []
new_remaining_collections = []
Expand Down Expand Up @@ -584,22 +651,26 @@ def build_collection_hierarchy(collection_id, headers):
remaining_collections.append(col)

while len(remaining_collections) > 0:
children, remaining_collections = add_children(
children, remaining_collections
)
children, remaining_collections = add_children(children, remaining_collections)
print("Now we are done")
return children


def build_collection_metadata_for_v1_dataset(dataset_id, user_v1, headers):
dataset_collections = get_clowder_v1_dataset_collections(headers=headers, user_v1=user_v1, dataset_id=dataset_id)
dataset_collections = get_clowder_v1_dataset_collections(
headers=headers, user_v1=user_v1, dataset_id=dataset_id
)

collection_data = []
for collection in dataset_collections:
collection_children = build_collection_hierarchy(collection_id=collection['id'], headers=headers)
collection_children = build_collection_hierarchy(
collection_id=collection["id"], headers=headers
)
for child in collection_children:
collection_data.append(child)
return collection_data


def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
"""Process user resources from Clowder v1 to Clowder v2."""
user_v1_datasets = get_clowder_v1_user_datasets(user_id=user_v1["id"])
Expand Down Expand Up @@ -639,22 +710,24 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
add_file_metadata(file, file_v2_id, clowder_headers_v1, user_headers_v2)
# posting the collection hierarchy as metadata
# TODO need to actually post this
collection_metadata_dict = build_collection_metadata_for_v1_dataset(dataset_id=dataset['id'],user_v1=user_v1, headers=clowder_headers_v1)
collection_metadata_dict = build_collection_metadata_for_v1_dataset(
dataset_id=dataset["id"], user_v1=user_v1, headers=clowder_headers_v1
)
migration_extractor_collection_metadata = {
"listener" : {
"name": "migration",
"version": "1",
"description": "Migration of metadata from Clowder v1 to Clowder v2",
"listener": {
"name": "migration",
"version": "1",
"description": "Migration of metadata from Clowder v1 to Clowder v2",
},
"context_url": "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
"content": collection_metadata_dict,
"contents": collection_metadata_dict,
}
v2_metadata_endpoint = (
f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata"
)
v2_metadata_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata"
response = requests.post(
v2_metadata_endpoint, json=migration_extractor_collection_metadata, headers=clowder_headers_v2
v2_metadata_endpoint,
json=migration_extractor_collection_metadata,
headers=clowder_headers_v2,
)
if response.status_code == 200:
print("Successfully added collection info as metadata in v2.")
Expand All @@ -663,7 +736,6 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
f"Failed to add collection info as metadata in Clowder v2. Status code: {response.status_code}"
)


return [USER_MAP, DATASET_MAP]


Expand Down

0 comments on commit f063893

Please sign in to comment.