Skip to content

Commit

Permalink
Add feature for frozen DOI bundles
Browse files Browse the repository at this point in the history
  • Loading branch information
danlamanna committed Feb 5, 2025
1 parent d4542a9 commit 40e2790
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 4 deletions.
2 changes: 1 addition & 1 deletion isic/core/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,4 @@ class CollectionAdmin(StaffReadonlyAdmin):
@admin.register(Doi)
class DoiAdmin(StaffReadonlyAdmin):
list_select_related = ["collection"]
list_display = ["id", "url", "collection"]
list_display = ["id", "url", "collection", "bundle"]
17 changes: 17 additions & 0 deletions isic/core/migrations/0005_doi_bundle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 5.1.5 on 2025-02-05 17:43

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("core", "0004_alter_collection_description"),
]

operations = [
migrations.AddField(
model_name="doi",
name="bundle",
field=models.FileField(blank=True, null=True, upload_to="doi-bundles/"),
),
]
2 changes: 2 additions & 0 deletions isic/core/models/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,7 @@ class Doi(TimeStampedModel):

url = models.CharField(max_length=200)

bundle = models.FileField(upload_to="doi-bundles/", null=True, blank=True)

def __str__(self):
return self.id
67 changes: 66 additions & 1 deletion isic/core/services/collection/doi.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,31 @@
import csv
import logging
from pathlib import Path
import random
import tempfile
from typing import Any
from urllib import parse
import zipfile

from django.conf import settings
from django.contrib.auth.models import User
from django.core.exceptions import ValidationError
from django.db import transaction
from django.core.files import File
from django.db import connection, transaction
from django.template.loader import render_to_string
import requests
from requests.exceptions import HTTPError

from isic.core.models.collection import Collection
from isic.core.models.doi import Doi
from isic.core.services import image_metadata_csv
from isic.core.services.collection import (
collection_get_creators_in_attribution_order,
collection_lock,
collection_update,
)
from isic.core.tasks import create_doi_bundle_task
from isic.zip_download.api import get_attributions

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -145,6 +154,62 @@ def collection_create_doi(*, user: User, collection: Collection) -> Doi:
# retry to publish it. (May want a django-admin action for this if it ever happens.)
_datacite_update_doi(doi_dict, doi_id)

create_doi_bundle_task.delay_on_commit(doi_id)

logger.info("User %d created DOI %s for collection %d", user.id, doi.id, collection.id)

return doi


def collection_create_doi_bundle(*, doi: Doi) -> None:
"""
Create a frozen bundle of the collection associated with the DOI.
This contains a lot of overlapping logic with the collection_download_metadata view
and the zip_file_listing view.
"""
collection = Collection.objects.select_related("doi").get(doi=doi)

with transaction.atomic():
cursor = connection.cursor()
cursor.execute("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ")

images = collection.images.select_related("accession").all()

bundle_filename = f"ISIC-Collection-{doi.id.split('/')[1]}.zip"
with zipfile.ZipFile(bundle_filename, "w") as bundle:
for image in images.iterator():
with image.accession.blob.open("rb") as blob:
bundle.writestr(f"images/{image.isic_id}.jpg", blob.read())

with tempfile.NamedTemporaryFile("w") as metadata_file:
collection_metadata = image_metadata_csv(qs=images)
writer = csv.DictWriter(metadata_file, fieldnames=next(collection_metadata))
writer.writeheader()
for row in collection_metadata:
assert isinstance(row, dict) # noqa: S101
writer.writerow(row)
metadata_file.flush()

bundle.write(metadata_file.name, "metadata.csv")

for license_ in (
images.values_list("accession__copyright_license", flat=True)
.order_by()
.distinct()
):
bundle.writestr(
f"licenses/{license_}.txt",
render_to_string(f"zip_download/{license_}.txt"),
)

attributions = get_attributions(
images.values_list("accession__cohort__attribution", flat=True)
)
bundle.writestr("attribution.txt", "\n\n".join(attributions))

with Path(bundle_filename).open("rb") as bundle_file:
doi.bundle = File(bundle_file)
doi.save()

Path(bundle_filename).unlink()
10 changes: 10 additions & 0 deletions isic/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from urllib3.exceptions import ConnectionError, TimeoutError

from isic.core.models.collection import Collection
from isic.core.models.doi import Doi
from isic.core.models.image import Image
from isic.core.search import bulk_add_to_search_index
from isic.core.serializers import SearchQueryIn
Expand Down Expand Up @@ -127,6 +128,15 @@ def generate_staff_image_list_metadata_csv(user_id: int) -> None:
Path(f.name).unlink()


@shared_task(soft_time_limit=60 * 60 * 12, time_limit=(60 * 60 * 12) + 60)
def create_doi_bundle_task(doi_id: str) -> None:
from isic.core.services.collection.doi import collection_create_doi_bundle

doi = Doi.objects.get(id=doi_id)

collection_create_doi_bundle(doi=doi)


@shared_task(soft_time_limit=10, time_limit=15)
def prune_expired_oauth_tokens():
clear_expired_oauth_tokens()
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@
</div>
{% endif %}
<div class="py-1" role="none">
{% if collection.doi.bundle %}
<a href="{{ collection.doi.bundle.url }}" class="hover:bg-gray-100 hover:text-gray-900 text-gray-700 block px-4 py-2 text-sm" role="menuitem" tabindex="-1" id="menu-item-0">Download Original Collection</a>
{% else %}
<a disabled class="cursor-default hover:bg-gray-100 hover:text-gray-400 text-gray-300 block px-4 py-2 text-sm" role="menuitem" tabindex="-1" id="menu-item-0">Download Original Collection</a>
{% endif %}
<a @click="downloadAsZip({collections: '{{ collection.id }}'})"
class="cursor-pointer hover:bg-gray-100 hover:text-gray-900 text-gray-700 block px-4 py-2 text-sm" role="menuitem"
tabindex="-1" id="menu-item-0">Download Collection
Expand Down
45 changes: 43 additions & 2 deletions isic/core/tests/test_doi.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
from pathlib import Path
import tempfile
import zipfile

import pytest

from isic.core.forms.doi import CreateDoiForm
from isic.core.models.doi import Doi
from isic.core.models.image import Image
from isic.core.services.collection.doi import collection_build_doi, collection_create_doi
from isic.core.services.collection.doi import (
collection_build_doi,
collection_create_doi,
collection_create_doi_bundle,
)


@pytest.fixture()
Expand All @@ -28,7 +36,7 @@ def staff_user_request(staff_user, mocker):
return mocker.MagicMock(user=staff_user)


@pytest.mark.django_db()
@pytest.mark.django_db(transaction=True)
def test_collection_create_doi(
public_collection_with_public_images,
staff_user,
Expand All @@ -40,6 +48,7 @@ def test_collection_create_doi(
public_collection_with_public_images.refresh_from_db()
assert public_collection_with_public_images.locked
assert public_collection_with_public_images.doi
assert public_collection_with_public_images.doi.bundle
assert public_collection_with_public_images.doi.creator == staff_user
mock_datacite_create_doi.assert_called_once()
mock_datacite_update_doi.assert_called_once()
Expand Down Expand Up @@ -179,3 +188,35 @@ def test_doi_creators_collapse_repeated_creators(collection_with_repeated_creato
assert creators[1]["name"] == cohort_b.attribution

assert len(creators) == 2


@pytest.mark.django_db(transaction=True)
def test_doi_bundle_contains_expected_files(
image_factory,
collection_factory,
staff_user,
mock_datacite_create_doi,
mock_datacite_update_doi,
):
collection = collection_factory(public=True)
images = [image_factory(public=True) for _ in range(3)]
collection.images.set(images)

doi = collection_create_doi(user=staff_user, collection=collection)

collection_create_doi_bundle(doi=doi)

with tempfile.TemporaryDirectory() as temp_dir, zipfile.ZipFile(doi.bundle) as zf:
zf.extractall(temp_dir)

for image in images:
image_path = f"images/{image.isic_id}.jpg"
assert (Path(temp_dir) / image_path).exists()

assert (Path(temp_dir) / "metadata.csv").exists()

licenses = {images[0].accession.copyright_license for image in images}
for license_ in licenses:
assert (Path(temp_dir) / f"licenses/{license_}.txt").exists()

assert (Path(temp_dir) / "attribution.txt").exists()

0 comments on commit 40e2790

Please sign in to comment.