From a1ee3ee4e450ad6147641522ff199b11c724b23c Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 14:38:21 -0400 Subject: [PATCH 01/12] DataSource:model to replace dataaccess Here we are creating DataSource model which will work exactly like dataaccess but will let us manage all project related access through this model. This will make it easier in future to manage access as everything will be centralized though here. Similarly as discussed here https://github.com/MIT-LCP/physionet-build/issues/1927, we can use this model to work with `AccessGrant` --- .../project/modelcomponents/access.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/physionet-django/project/modelcomponents/access.py b/physionet-django/project/modelcomponents/access.py index 0ab3df2549..50a9b78ab6 100644 --- a/physionet-django/project/modelcomponents/access.py +++ b/physionet-django/project/modelcomponents/access.py @@ -4,9 +4,11 @@ from django.contrib.auth.hashers import check_password, make_password from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType +from django.core.exceptions import ValidationError from django.db import models from django.utils import timezone from django.utils.crypto import get_random_string +from django.utils.translation import gettext_lazy as _ from project.modelcomponents.fields import SafeHTMLField from project.validators import validate_version @@ -167,6 +169,67 @@ class Meta: default_permissions = () +class DataSource(models.Model): + """ + Controls all access to project data. + """ + class DataLocation(models.TextChoices): + DIRECT = 'DI', 'Direct' + GOOGLE_BIGQUERY = 'GBQ', 'Google BigQuery' + GOOGLE_CLOUD_STORAGE = 'GCS', 'Google Cloud Storage' + AWS_OPEN_DATA = 'AOD', 'AWS Open Data' + AWS_S3 = 'AS3', 'AWS S3' + + class AccessMechanism(models.TextChoices): + GOOGLE_GROUP_EMAIL = 'google-group-email', 'Google Group Email' + S3 = 's3', 'S3' + RESEARCH_ENVIRONMENT = 'research-environment', 'Research Environment' + + project = models.ForeignKey('project.PublishedProject', + related_name='data_sources', db_index=True, on_delete=models.CASCADE) + files_available = models.BooleanField(default=False) + data_location = models.CharField(max_length=3, choices=DataLocation.choices) + access_mechanism = models.CharField(max_length=20, choices=AccessMechanism.choices, null=True, blank=True) + email = models.CharField(max_length=320, null=True, blank=True) + uri = models.CharField(max_length=320, null=True, blank=True) + + class Meta: + default_permissions = () + unique_together = ('project', 'data_location') + + def clean(self): + super().clean() + + if self.data_location == self.DataLocation.GOOGLE_BIGQUERY: + if self.access_mechanism != self.AccessMechanism.GOOGLE_GROUP_EMAIL: + raise ValidationError('Google BigQuery data sources must use the Google Group Email access mechanism.') + if not self.email: + raise ValidationError('Google BigQuery data sources must have an email address.') + elif self.data_location == self.DataLocation.GOOGLE_CLOUD_STORAGE: + if self.access_mechanism != self.AccessMechanism.GOOGLE_GROUP_EMAIL: + raise ValidationError('Google Cloud Storage data sources must use the Google Group Email access ' + 'mechanism.') + if not self.uri: + raise ValidationError('Google Cloud Storage data sources must have an uri address.') + elif self.data_location == self.DataLocation.AWS_OPEN_DATA: + if self.access_mechanism != self.AccessMechanism.S3: + raise ValidationError('AWS Open Data data sources must use the S3 access mechanism.') + if not self.uri: + raise ValidationError('AWS Open Data data sources must have a URI.') + elif self.data_location == self.DataLocation.AWS_S3: + if self.access_mechanism != self.AccessMechanism.S3: + raise ValidationError('AWS S3 data sources must use the S3 access mechanism.') + if not self.uri: + raise ValidationError('AWS S3 data sources must have a URI.') + elif self.data_location == self.DataLocation.DIRECT: + if self.email: + raise ValidationError('Direct data sources must not have an email address.') + if self.uri: + raise ValidationError('Direct data sources must not have a URI.') + else: + raise ValidationError('Invalid data location.') + + class AnonymousAccess(models.Model): """ Makes it possible to grant anonymous access (without user auth) From 58f65d1121edafd4a4695e5f4f117df188916b1a Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 14:39:14 -0400 Subject: [PATCH 02/12] interface to create data_source for project --- physionet-django/console/forms.py | 21 ++++++++++ .../console/manage_published_project.html | 41 +++++++++++++++++++ physionet-django/console/views.py | 17 ++++++++ 3 files changed, 79 insertions(+) diff --git a/physionet-django/console/forms.py b/physionet-django/console/forms.py index 74f86d7923..548c916b23 100644 --- a/physionet-django/console/forms.py +++ b/physionet-django/console/forms.py @@ -21,6 +21,7 @@ Contact, CopyeditLog, DataAccess, + DataSource, DUA, EditLog, License, @@ -689,6 +690,26 @@ def save(self): return data_access +class DataSourceForm(forms.ModelForm): + class Meta: + model = DataSource + fields = ('data_location', 'access_mechanism', 'files_available', 'email', 'uri' ) + + def __init__(self, project, *args, **kwargs): + super().__init__(*args, **kwargs) + self.project = project + + if not settings.ENABLE_CLOUD_RESEARCH_ENVIRONMENTS: + self.fields['access_mechanism'].choices = [ + choice for choice in self.fields['access_mechanism'].choices if choice[0] != 'research-environment'] + + def save(self): + data_source = super(DataSourceForm, self).save(commit=False) + data_source.project = self.project + data_source.save() + return data_source + + class PublishedProjectContactForm(forms.ModelForm): class Meta: model = Contact diff --git a/physionet-django/console/templates/console/manage_published_project.html b/physionet-django/console/templates/console/manage_published_project.html index 7b3902ea2e..cf48ed61e8 100644 --- a/physionet-django/console/templates/console/manage_published_project.html +++ b/physionet-django/console/templates/console/manage_published_project.html @@ -359,6 +359,46 @@
Storage location
{% endif %} +
  • +
    Data Source
    +

    Add and remove Data Source options.

    +{#
    #} +{#

    Note: The remove button will remove the option for requesting cloud access that appears in the files section of a project. It will not (1) delete/deactivate the bucket or (2) remove access for users who are already using the bucket.

    #} +{#
    #} +
    + {% csrf_token %} + {% include "project/content_inline_form_snippet.html" with form=data_source_form %} + +
    + {% if data_sources %} + + + + + + + + + + + + {% for item in data_sources %} + + + + + + + + {% csrf_token %} + + + + {% endfor %} + +
    LocationAccess MechanismFiles AvailableEmailUriRemove
    {{item.data_location}}{{item.access_mechanism}}{{item.files_available}}{{item.email}}{{item.uri}}
    + {% endif %} +
  • Google Cloud
    {% if not has_credentials %} @@ -388,6 +428,7 @@
    Google Cloud
    + {% endblock %} diff --git a/physionet-django/console/views.py b/physionet-django/console/views.py index 07fc7e6919..ede8109e95 100644 --- a/physionet-django/console/views.py +++ b/physionet-django/console/views.py @@ -43,6 +43,7 @@ ActiveProject, ArchivedProject, DataAccess, + DataSource, DUA, DataAccessRequest, DUASignature, @@ -829,6 +830,7 @@ def manage_published_project(request, project_slug, version): deprecate_form = None if project.deprecated_files else forms.DeprecateFilesForm() has_credentials = bool(settings.GOOGLE_APPLICATION_CREDENTIALS) data_access_form = forms.DataAccessForm(project=project) + data_source_form = forms.DataSourceForm(project=project) contact_form = forms.PublishedProjectContactForm(project=project, instance=project.contact) legacy_author_form = forms.CreateLegacyAuthorForm(project=project) @@ -895,6 +897,18 @@ def manage_published_project(request, project_slug, version): if data_access_form.is_valid(): data_access_form.save() messages.success(request, "Stored method to access the files") + elif 'data_location' in request.POST: + data_source_form = forms.DataSourceForm(project=project, data=request.POST) + if data_source_form.is_valid(): + data_source_form.save() + messages.success(request, "Stored method to access the files") + elif 'data_source_removal' in request.POST and request.POST['data_source_removal'].isdigit(): + try: + data_source = DataSource.objects.get(project=project, id=request.POST['data_source_removal']) + data_source.delete() + # Deletes the object if it exists for that specific project. + except DataSource.DoesNotExist: + pass elif 'data_access_removal' in request.POST and request.POST['data_access_removal'].isdigit(): try: data_access = DataAccess.objects.get(project=project, id=request.POST['data_access_removal']) @@ -921,6 +935,7 @@ def manage_published_project(request, project_slug, version): legacy_author_form = forms.CreateLegacyAuthorForm(project=project) data_access = DataAccess.objects.filter(project=project) + data_sources = DataSource.objects.filter(project=project) authors, author_emails, storage_info, edit_logs, copyedit_logs, latest_version = project.info_card() tasks = list(get_associated_tasks(project)) @@ -946,7 +961,9 @@ def manage_published_project(request, project_slug, version): 'deprecate_form': deprecate_form, 'has_credentials': has_credentials, 'data_access_form': data_access_form, + 'data_source_form': data_source_form, 'data_access': data_access, + 'data_sources': data_sources, 'rw_tasks': rw_tasks, 'ro_tasks': ro_tasks, 'anonymous_url': anonymous_url, From 86dcdcf72e58097537500f05e5ee45fe79f29c14 Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 16:44:10 -0400 Subject: [PATCH 03/12] indent for readability --- .../templates/project/published_project.html | 400 +++++++++--------- 1 file changed, 201 insertions(+), 199 deletions(-) diff --git a/physionet-django/project/templates/project/published_project.html b/physionet-django/project/templates/project/published_project.html index 7975abf812..85a9998ef9 100644 --- a/physionet-django/project/templates/project/published_project.html +++ b/physionet-django/project/templates/project/published_project.html @@ -36,26 +36,26 @@

    {{ project.title }}

    - {% for author in authors %} - {{ author|show_author_info|safe }} - {% if forloop.counter < authors|length %}, {% endif %} - {% endfor %} + {% for author in authors %} + {{ author|show_author_info|safe }} + {% if forloop.counter < authors|length %}, {% endif %} + {% endfor %}

    Published: {{ project.publish_datetime|date }}. Version: {{ project.version }}{% if not project.is_latest_version %} - <View latest version>{% endif %} + <View latest version>{% endif %}

    {% if not project.is_latest_version %} - + {% endif %}
    @@ -63,23 +63,23 @@

    {{ project.title }}

    {% if news %} {% endif %} @@ -93,18 +93,18 @@

    {{ project.title }}

    {% if project.is_legacy %} {{ project.full_description|safe }} -
    +
    {% else %} {# 0: Database #} {% if project.resource_type.id == 0 %} {% include "project/database_content.html" %} - {# 1: Software #} + {# 1: Software #} {% elif project.resource_type.id == 1 %} {% include "project/software_content.html" %} - {# 2: Challenge #} + {# 2: Challenge #} {% elif project.resource_type.id == 2 %} {% include "project/challenge_content.html" %} - {# 3: Model #} + {# 3: Model #} {% elif project.resource_type.id == 3 %} {% include "project/model_content.html" %} {% endif %} @@ -116,131 +116,131 @@

    {{ project.title }}

    {# Contents Button #} {% if not project.is_legacy %} -
    - - {% endif %} {% if parent_projects %} -
    -
    Parent Projects
    -
    - {{ project.title }} was derived from: -
      - {% for pp in parent_projects %} -
    • {{ pp }}
    • - {% endfor %} -
    - Please cite them when using this project. +
    +
    Parent Projects
    +
    + {{ project.title }} was derived from: +
      + {% for pp in parent_projects %} +
    • {{ pp }}
    • + {% endfor %} +
    + Please cite them when using this project. +
    -
    {% endif %}
    Share
    + href="mailto:?subject={{ project.title|urlencode }}&body={{ request.build_absolute_uri }}" + role="button" title="Share with email"> + href="http://www.facebook.com/sharer.php?u={{ request.build_absolute_uri }}" role="button" + title="Share on Facebook"> + href="https://www.linkedin.com/shareArticle?url={{ request.build_absolute_uri }}" + role="button" title="Share on LinkedIn"> + href="https://www.reddit.com/submit?url={{ request.build_absolute_uri }}&title={{ project.title|urlencode }}" + role="button" title="Share on Reddit"> + href="https://twitter.com/intent/tweet?text={{ project.title|urlencode }}. {{ request.build_absolute_uri }}" + role="button" title="Share on Twitter">
    @@ -309,7 +309,7 @@
    Discovery

    Project Website:
    {{ project.project_home_page }} + class="fas fa-external-link-alt"> {{ project.project_home_page }}

    {% endif %}
    @@ -335,7 +335,7 @@
    Versions
      {% for project in all_project_versions %}
    • {{ project.version }} + href="{% url 'published_project' project.slug project.version %}">{{ project.version }} - {{ project.publish_datetime|date }}
    • {% endfor %} @@ -355,7 +355,7 @@

      Files

      The files for this version of the project ({{ project.version }}) are no longer available. The latest version of this project is {{ latest_version.version }} + target="_blank">{{ latest_version.version }} {% endif %}
    {% elif project.embargo_active %} @@ -365,81 +365,83 @@

    Files

    {% else %} {% if is_authorized %} {% if project.allow_file_downloads %} - {# refactored code goes here #} -

    Total uncompressed size: {{ main_size }}.

    - {# ZIP START #} -
    Access the files
    - {% if project.access_policy == AccessPolicy.CONTRIBUTOR_REVIEW %} -

    You have been granted access for a specific - project. If you require access for an additional project, please submit a new request. -

    - {% endif %} + {# refactored code goes here #} +

    Total uncompressed size: {{ main_size }}.

    + {# ZIP START #} +
    Access the files
    + {% if project.access_policy == AccessPolicy.CONTRIBUTOR_REVIEW %} +

    You have been granted access for a specific + project. If you require access for an additional project, please submit a new request. +

    + {% endif %} -
      - {% if project.access_policy %} - {% if project.compressed_storage_size %} -
    • Download - the ZIP file ({{ compressed_size }}) -
    • - {% endif %} - {% else %} - {% if project.gcp %} - {% if project.gcp.sent_zip %} - - + -
    • Download the ZIP file - ({{ compressed_size }}) -
    • +
    • Download the ZIP file + ({{ compressed_size }}) +
    • + {% elif project.compressed_storage_size %} +
    • Download the ZIP file + ({{ compressed_size }}) +
    • + {% endif %} + {% if project.gcp.sent_files %} +
    • Access the files using the Google Cloud Storage Browser here. + Login with a Google account is required. +
    • + Access the data using the Google Cloud command line tools (please refer to the gsutil + documentation for guidance): +
      gsutil -m -u YOUR_PROJECT_ID cp -r gs://{{ project.gcp.bucket_name }} DESTINATION
      +
    • + {% endif %} {% elif project.compressed_storage_size %} -
    • Download the ZIP file - ({{ compressed_size }}) +
    • Download the ZIP file ({{ compressed_size }})
    • {% endif %} - {% if project.gcp.sent_files %} -
    • Access the files using the Google Cloud Storage Browser here. - Login with a Google account is required. -
    • - Access the data using the Google Cloud command line tools (please refer to the gsutil - documentation for guidance): -
      gsutil -m -u YOUR_PROJECT_ID cp -r gs://{{ project.gcp.bucket_name }} DESTINATION
      -
    • - {% endif %} - {% elif project.compressed_storage_size %} -
    • Download the ZIP file ({{ compressed_size }}) + {% endif %} + {% include "project/published_project_data_access.html" %} + {% if is_wget_supported %} +
    • + Download the files using your terminal: +
      wget -r -N -c -np{% if project.access_policy %} --user {{ user }}
      +                  --ask-password{% endif %}
      +                  {{ bulk_url_prefix }}{% url 'serve_published_project_file' project.slug project.version '' %}
    • {% endif %} - {% endif %} - {% include "project/published_project_data_access.html" %} - {% if is_wget_supported %} -
    • - Download the files using your terminal: -
      wget -r -N -c -np{% if project.access_policy %} --user {{ user }} --ask-password{% endif %} {{ bulk_url_prefix }}{% url 'serve_published_project_file' project.slug project.version '' %}
      -
    • - {% endif %} -
    - {# ZIP END #} + + {# ZIP END #} - {% if is_lightwave_supported and project.access_policy == AccessPolicy.OPEN %} - {% if project.has_wfdb %} -

    Visualize waveforms

    + {% if is_lightwave_supported and project.access_policy == AccessPolicy.OPEN %} + {% if project.has_wfdb %} +

    Visualize waveforms

    + {% endif %} {% endif %} - {% endif %} -
    - {% include "project/files_panel.html" %} -
    - {% else %} - {% include "project/published_project_denied_downloads.html" %} - {% endif %} +
    + {% include "project/files_panel.html" %} +
    + {% else %} + {% include "project/published_project_denied_downloads.html" %} + {% endif %} {% else %} {% include "project/published_project_unauthorized.html" %} @@ -458,7 +460,7 @@
    Access the files
    href="{% url 'data_access_request_status' project.slug project.version %}">please reapply.
    {% endif %} - + From 78c47a364d53c635969397c98c26129763358368 Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 17:17:15 -0400 Subject: [PATCH 04/12] refactor logic for Research Env Context: The goal here is to refactor to let access controlled through DataAccess without changing the existing logic. And the existing logic is if the project has research environment enabled, the user should not be able to view/download files(they can only create research environment and play with the data there) In this commit, we divided the access into 2 section(first for RE and the rest). The outcome should be same as before which is if RE is enabled show the `published_project_denied_downloads.html` else show normal file access stuff --- .../templates/project/published_project.html | 133 +++++++++--------- 1 file changed, 67 insertions(+), 66 deletions(-) diff --git a/physionet-django/project/templates/project/published_project.html b/physionet-django/project/templates/project/published_project.html index 85a9998ef9..f2f240c202 100644 --- a/physionet-django/project/templates/project/published_project.html +++ b/physionet-django/project/templates/project/published_project.html @@ -364,87 +364,88 @@

    Files

    {% else %} {% if is_authorized %} - {% if project.allow_file_downloads %} - {# refactored code goes here #} -

    Total uncompressed size: {{ main_size }}.

    - {# ZIP START #} -
    Access the files
    - {% if project.access_policy == AccessPolicy.CONTRIBUTOR_REVIEW %} -

    You have been granted access for a specific - project. If you require access for an additional project, please submit a new request. -

    - {% endif %} + {% if can_create_research_environment %} + {% include "project/published_project_denied_downloads.html" %} + {% else %} + {% if project.allow_file_downloads %} + {# refactored code goes here #} +

    Total uncompressed size: {{ main_size }}.

    + {# ZIP START #} +
    Access the files
    + {% if project.access_policy == AccessPolicy.CONTRIBUTOR_REVIEW %} +

    You have been granted access for a specific + project. If you require access for an additional project, please submit a new request. +

    + {% endif %} -
      - {% if project.access_policy %} - {% if project.compressed_storage_size %} -
    • Download - the ZIP file ({{ compressed_size }}) -
    • - {% endif %} - {% else %} - {% if project.gcp %} - {% if project.gcp.sent_zip %} - - + -
    • Download the ZIP file - ({{ compressed_size }}) -
    • +
    • Download the ZIP file + ({{ compressed_size }}) +
    • + {% elif project.compressed_storage_size %} +
    • Download the ZIP file + ({{ compressed_size }}) +
    • + {% endif %} + {% if project.gcp.sent_files %} +
    • Access the files using the Google Cloud Storage Browser here. + Login with a Google account is required. +
    • + Access the data using the Google Cloud command line tools (please refer to the gsutil + documentation for guidance): +
      gsutil -m -u YOUR_PROJECT_ID cp -r gs://{{ project.gcp.bucket_name }} DESTINATION
      +
    • + {% endif %} {% elif project.compressed_storage_size %} -
    • Download the ZIP file - ({{ compressed_size }}) -
    • - {% endif %} - {% if project.gcp.sent_files %} -
    • Access the files using the Google Cloud Storage Browser here. - Login with a Google account is required. -
    • - Access the data using the Google Cloud command line tools (please refer to the gsutil - documentation for guidance): -
      gsutil -m -u YOUR_PROJECT_ID cp -r gs://{{ project.gcp.bucket_name }} DESTINATION
      +
    • Download the ZIP file ({{ compressed_size }})
    • {% endif %} - {% elif project.compressed_storage_size %} -
    • Download the ZIP file ({{ compressed_size }}) + {% endif %} + {% include "project/published_project_data_access.html" %} + {% if is_wget_supported %} +
    • + Download the files using your terminal: +
      wget -r -N -c -np{% if project.access_policy %} --user {{ user }}
      +                    --ask-password{% endif %}
      +                    {{ bulk_url_prefix }}{% url 'serve_published_project_file' project.slug project.version '' %}
    • {% endif %} - {% endif %} - {% include "project/published_project_data_access.html" %} - {% if is_wget_supported %} -
    • - Download the files using your terminal: -
      wget -r -N -c -np{% if project.access_policy %} --user {{ user }}
      -                  --ask-password{% endif %}
      -                  {{ bulk_url_prefix }}{% url 'serve_published_project_file' project.slug project.version '' %}
      -
    • - {% endif %} -
    - {# ZIP END #} + + {# ZIP END #} - {% if is_lightwave_supported and project.access_policy == AccessPolicy.OPEN %} - {% if project.has_wfdb %} -

    Visualize waveforms

    + {% if is_lightwave_supported and project.access_policy == AccessPolicy.OPEN %} + {% if project.has_wfdb %} +

    Visualize waveforms

    + {% endif %} {% endif %} + +
    + {% include "project/files_panel.html" %} +
    + {% endif %} -
    - {% include "project/files_panel.html" %} -
    - {% else %} - {% include "project/published_project_denied_downloads.html" %} {% endif %} - - {% else %} - {% include "project/published_project_unauthorized.html" %} {% endif %} {% endif %}
    From 10202d88f63c8e6d1162b0ef74624e8b2d4e4fbc Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 18:07:53 -0400 Subject: [PATCH 05/12] Direct Download refactor: step 2 replaces allow_file_downloads with can_view_files(this controls if a user has download access to the project by using the DataSource). Here the idea is that all the direct download stuff will be inside ``` {% if can_view_files %} ``` and the other access methods like gcp, aws will be outside this block. So the logic will be 1. If the project is RE project, show the RE warning message 2. Else 2.1 If the project has other access outside RE and direct download show that. 2.2 If the project has direct download access, then show direct download stuff To make it easier to understand the changes, i am breaking down the commits to very smaller chunks Note that 2.1 and 2.2 dont have to be either or(as currently a project can be directly accessible and also be accessible via gcs,aws etc) --- .../project/templates/project/published_project.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/physionet-django/project/templates/project/published_project.html b/physionet-django/project/templates/project/published_project.html index f2f240c202..81c7614cd1 100644 --- a/physionet-django/project/templates/project/published_project.html +++ b/physionet-django/project/templates/project/published_project.html @@ -367,7 +367,7 @@

    Files

    {% if can_create_research_environment %} {% include "project/published_project_denied_downloads.html" %} {% else %} - {% if project.allow_file_downloads %} + {% if can_view_files %} {# refactored code goes here #}

    Total uncompressed size: {{ main_size }}.

    {# ZIP START #} From 5837f74f810b146723b69a4e5567910ad2940df5 Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 18:09:55 -0400 Subject: [PATCH 06/12] Direct Download refactor: step 3 The p and h5 text should appear for both direct download and the other gcp,aws access mechanism. So brining them outside the can_view_file if block --- .../project/templates/project/published_project.html | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/physionet-django/project/templates/project/published_project.html b/physionet-django/project/templates/project/published_project.html index 81c7614cd1..a626c9a1e9 100644 --- a/physionet-django/project/templates/project/published_project.html +++ b/physionet-django/project/templates/project/published_project.html @@ -367,11 +367,10 @@

    Files

    {% if can_create_research_environment %} {% include "project/published_project_denied_downloads.html" %} {% else %} - {% if can_view_files %} - {# refactored code goes here #}

    Total uncompressed size: {{ main_size }}.

    - {# ZIP START #}
    Access the files
    + {% if can_view_files %} + {% if project.access_policy == AccessPolicy.CONTRIBUTOR_REVIEW %}

    You have been granted access for a specific From d312b8ccdadae875d5fbd1c974d414c2e3619bd4 Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 18:17:41 -0400 Subject: [PATCH 07/12] Direct Download refactor: step 4 Move the other access method(gcp) outside of can_view_files if block. The logic before this commit was, if a project doesnot have access policy but has project.gcp, show the gcp stuff. On this commit, we have the same logic. --- .../templates/project/published_project.html | 65 ++++++++++--------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/physionet-django/project/templates/project/published_project.html b/physionet-django/project/templates/project/published_project.html index a626c9a1e9..866ad7440c 100644 --- a/physionet-django/project/templates/project/published_project.html +++ b/physionet-django/project/templates/project/published_project.html @@ -367,8 +367,38 @@

    Files

    {% if can_create_research_environment %} {% include "project/published_project_denied_downloads.html" %} {% else %} -

    Total uncompressed size: {{ main_size }}.

    -
    Access the files
    +

    Total uncompressed size: {{ main_size }}.

    +
    Access the files
    + {% if project.gcp and not project.access_policy %} + {% if project.gcp.sent_zip %} + + +
  • Download the ZIP file + ({{ compressed_size }}) +
  • + {% elif project.compressed_storage_size %} +
  • Download the ZIP file + ({{ compressed_size }}) +
  • + {% endif %} + {% if project.gcp.sent_files %} +
  • Access the files using the Google Cloud Storage Browser here. + Login with a Google account is required. +
  • + Access the data using the Google Cloud command line tools (please refer to the gsutil + documentation for guidance): +
    gsutil -m -u YOUR_PROJECT_ID cp -r gs://{{ project.gcp.bucket_name }} DESTINATION
    +
  • + {% endif %} + {% elif project.compressed_storage_size %} +
  • Download the ZIP file ({{ compressed_size }}) +
  • + {% endif %} {% if can_view_files %} {% if project.access_policy == AccessPolicy.CONTRIBUTOR_REVIEW %} @@ -387,36 +417,7 @@
    Access the files
    {% endif %} {% else %} - {% if project.gcp %} - {% if project.gcp.sent_zip %} - - -
  • Download the ZIP file - ({{ compressed_size }}) -
  • - {% elif project.compressed_storage_size %} -
  • Download the ZIP file - ({{ compressed_size }}) -
  • - {% endif %} - {% if project.gcp.sent_files %} -
  • Access the files using the Google Cloud Storage Browser here. - Login with a Google account is required. -
  • - Access the data using the Google Cloud command line tools (please refer to the gsutil - documentation for guidance): -
    gsutil -m -u YOUR_PROJECT_ID cp -r gs://{{ project.gcp.bucket_name }} DESTINATION
    -
  • - {% endif %} - {% elif project.compressed_storage_size %} -
  • Download the ZIP file ({{ compressed_size }}) -
  • - {% endif %} + {% endif %} {% include "project/published_project_data_access.html" %} {% if is_wget_supported %} From 6b686cb5f4024bda1a7c2292999925983894f56a Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 18:18:20 -0400 Subject: [PATCH 08/12] Direct Download refactor: step 5 clean unnecessary/empty else statement --- .../project/templates/project/published_project.html | 2 -- 1 file changed, 2 deletions(-) diff --git a/physionet-django/project/templates/project/published_project.html b/physionet-django/project/templates/project/published_project.html index 866ad7440c..299db5ad7c 100644 --- a/physionet-django/project/templates/project/published_project.html +++ b/physionet-django/project/templates/project/published_project.html @@ -416,8 +416,6 @@
    Access the files
    the ZIP file ({{ compressed_size }}) {% endif %} - {% else %} - {% endif %} {% include "project/published_project_data_access.html" %} {% if is_wget_supported %} From 7a39834c640e0a40470e4d04c8186049de4b9cfc Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 18:21:40 -0400 Subject: [PATCH 09/12] Direct Download refactor: step 6 move data_access stuff outside the if can_view_files block. Current logic was if a user can download the project files, check the data_access and show the access method details for the ones that exist. So it should be safe to bring this block outside the if can_view_files block. Note that, currently for gcp, aws, we are still using the old data_access methods. We will refactor those in future commit(the current series of commit is to refactor the direct download to use data access while maintaining the same logic) --- .../project/templates/project/published_project.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/physionet-django/project/templates/project/published_project.html b/physionet-django/project/templates/project/published_project.html index 299db5ad7c..7aa076e690 100644 --- a/physionet-django/project/templates/project/published_project.html +++ b/physionet-django/project/templates/project/published_project.html @@ -399,6 +399,7 @@
    Access the files
  • Download the ZIP file ({{ compressed_size }})
  • {% endif %} + {% include "project/published_project_data_access.html" %} {% if can_view_files %} {% if project.access_policy == AccessPolicy.CONTRIBUTOR_REVIEW %} @@ -417,7 +418,7 @@
    Access the files
    {% endif %} {% endif %} - {% include "project/published_project_data_access.html" %} + {% if is_wget_supported %}
  • Download the files using your terminal: From 28e83549e9c532e7b44b448719612764d1db2ad3 Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 10 Apr 2023 19:24:12 -0400 Subject: [PATCH 10/12] auto create data source on project publish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a project is published, we should always create a datasource object based on the project storage type because with the DataSource  all access will go though it, otherwise the files wont be accessible. This commit creates the DataSource type automatically when project is published. The type of DataSource created depends on the settings. For Physionet, the default data storage is direct, and for HDN its GCP(defined by STORAGE_TYPE) and access mechanism is Research Environment. For auto creation of DataSource, i added 2 new environment variable to control location and access_mechanism of DataSource. Note: the DataSourceCreator doesn't belong on the access.py, i had to keep it here for a short time until we refactor the authorization in a separate app to avoid circular import --- .env.example | 13 +++++- physionet-django/physionet/settings/base.py | 5 +++ .../project/modelcomponents/access.py | 41 +++++++++++++++++++ .../project/modelcomponents/activeproject.py | 2 + 4 files changed, 60 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 1a2f1706fd..bbe52513fc 100644 --- a/.env.example +++ b/.env.example @@ -187,7 +187,18 @@ MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = 20 # CITISOAPService API # This is the WebServices username and password to access the CITI SOAP Service to obtain users training report details # The account can be created at https://webservices.citiprogram.org/login/CreateAccount.aspx -# The SOAP Service Access can be tested at https://webservices.citiprogram.org/Client/CITISOAPClient_Simple.aspx +# The SOAP Service Access can be tested at https://webservices.citiprogram.org/Client/CITISOAPClient_Simple.aspx CITI_USERNAME= CITI_PASSWORD= CITI_SOAP_URL="https://webservices.citiprogram.org/SOAP/CITISOAPService.asmx" + + +# Data Source configurations +# DEFAULT_PROJECT_DATA_LOCATION controls how data will be stored when a project is published (Direct (DI), Google BigQuery (GBQ), Google Cloud Storage (GCS), AWS Open Data (AOD), AWS S3 (AS3)) +# OPTIONS are: DI, GBQ, GCS, AOD, AS3 +DEFAULT_PROJECT_DATA_LOCATION = 'DI' + +# DEFAULT_PROJECT_ACCESS_MECHANISM controls how users can use the data (Google Group Email (google-group-email), S3 (s3), Research Environment (research-environment)) +# OPTIONS are: google-group-email, s3, research-environment or "" (empty string) for no access mechanism(only direct access) +# as of now, Research Environment is only available for GCS data location +DEFAULT_PROJECT_ACCESS_MECHANISM = '' diff --git a/physionet-django/physionet/settings/base.py b/physionet-django/physionet/settings/base.py index 471f329dba..294cae4e17 100644 --- a/physionet-django/physionet/settings/base.py +++ b/physionet-django/physionet/settings/base.py @@ -17,6 +17,8 @@ from decouple import config, UndefinedValueError +from project.models import DataSource + # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -617,3 +619,6 @@ class StorageTypes: # minimum number of word needed for research_summary field for Credentialing Model. MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = config('MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING', cast=int, default=20) + +DEFAULT_PROJECT_DATA_LOCATION = config('DEFAULT_PROJECT_DATA_LOCATION', default=DataSource.DataLocation.DIRECT) +DEFAULT_PROJECT_ACCESS_MECHANISM = config('DEFAULT_PROJECT_ACCESS_MECHANISM', default=None) diff --git a/physionet-django/project/modelcomponents/access.py b/physionet-django/project/modelcomponents/access.py index 50a9b78ab6..707efb092c 100644 --- a/physionet-django/project/modelcomponents/access.py +++ b/physionet-django/project/modelcomponents/access.py @@ -1,6 +1,7 @@ from datetime import timedelta from enum import IntEnum +from django.conf import settings from django.contrib.auth.hashers import check_password, make_password from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType @@ -13,6 +14,7 @@ from project.validators import validate_version from project.managers.access import DataAccessRequestQuerySet, DataAccessRequestManager +from physionet.settings.base import StorageTypes class AccessPolicy(IntEnum): @@ -337,3 +339,42 @@ class Meta: def __str__(self): return self.name + + +class DataSourceCreator: + def __init__(self, **kwargs): + self.data_location = kwargs.get('data_location', None) + self.files_available = kwargs.get('files_available', None) + self.email = kwargs.get('email', None) + self.uri = kwargs.get('uri', None) + self.access_mechanism = kwargs.get('access_mechanism', None) + + def create(self, project): + DataSource.objects.create( + project=project, + files_available=self.files_available, + data_location=self.data_location, + access_mechanism=self.access_mechanism, + email=self.email, + uri=self.uri, + ) + + @staticmethod + def create_default(project): + if (settings.DEFAULT_PROJECT_DATA_LOCATION == DataSource.DataLocation.DIRECT + and settings.STORAGE_TYPE == StorageTypes.LOCAL): + DataSource.objects.create( + project=project, + files_available=True, + data_location=DataSource.DataLocation.DIRECT, + ) + elif (settings.DEFAULT_PROJECT_ACCESS_MECHANISM == DataSource.DataLocation.RESEARCH_ENVIRONMENT + and settings.DEFAULT_PROJECT_DATA_LOCATION == DataSource.DataLocation.GOOGLE_CLOUD_STORAGE + and settings.STORAGE_TYPE == StorageTypes.GCP): + DataSource.objects.create( + project=project, + files_available=False, + data_location=DataSource.DataLocation.GOOGLE_CLOUD_STORAGE, + uri=f'gs://{project.project_file_root()}/', + access_mechanism=DataSource.AccessMechanism.RESEARCH_ENVIRONMENT, + ) diff --git a/physionet-django/project/modelcomponents/activeproject.py b/physionet-django/project/modelcomponents/activeproject.py index 2ece76d93c..469a13c099 100644 --- a/physionet-django/project/modelcomponents/activeproject.py +++ b/physionet-django/project/modelcomponents/activeproject.py @@ -29,6 +29,7 @@ from project.modelcomponents.submission import CopyeditLog, EditLog, SubmissionInfo from project.modelcomponents.unpublishedproject import UnpublishedProject from project.projectfiles import ProjectFiles +from project.models import DataSourceCreator from project.validators import validate_subdir LOGGER = logging.getLogger(__name__) @@ -614,6 +615,7 @@ def publish(self, slug=None, make_zip=True, title=None): raise ProjectFiles().publish_complete(self, published_project) + DataSourceCreator().create_default(published_project) return published_project From d987295a588513a9fa8d6e410a0cd27e1cab7dd7 Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Wed, 19 Apr 2023 10:41:32 -0400 Subject: [PATCH 11/12] add migration files --- .../project/migrations/0068_datasource.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 physionet-django/project/migrations/0068_datasource.py diff --git a/physionet-django/project/migrations/0068_datasource.py b/physionet-django/project/migrations/0068_datasource.py new file mode 100644 index 0000000000..320a64f353 --- /dev/null +++ b/physionet-django/project/migrations/0068_datasource.py @@ -0,0 +1,30 @@ +# Generated by Django 4.1.7 on 2023-04-19 14:03 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('project', '0067_alter_activeproject_core_project_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='DataSource', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('files_available', models.BooleanField(default=False)), + ('data_location', models.CharField(choices=[('DI', 'Direct'), ('GBQ', 'Google BigQuery'), ('GCS', 'Google Cloud Storage'), ('AOD', 'AWS Open Data'), ('AS3', 'AWS S3')], default='DI', max_length=3)), + ('access_mechanism', models.CharField(blank=True, choices=[('google-group-email', 'Google Group Email'), ('s3', 'S3'), ('research-environment', 'Research Environment')], max_length=20, null=True)), + ('email', models.CharField(blank=True, max_length=320, null=True)), + ('uri', models.CharField(blank=True, max_length=320, null=True)), + ('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='data_sources', to='project.publishedproject')), + ], + options={ + 'default_permissions': (), + 'unique_together': {('project', 'data_location')}, + }, + ), + ] From 3a3752acbeaccd1710fb1247847806e0d6fdaec0 Mon Sep 17 00:00:00 2001 From: Amit Upreti Date: Mon, 24 Apr 2023 10:50:12 -0400 Subject: [PATCH 12/12] bug fix Looks like we cannot import app models in settings because during code initialization the settings runs before the apps are loaded which throws error. --- physionet-django/physionet/settings/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/physionet-django/physionet/settings/base.py b/physionet-django/physionet/settings/base.py index 294cae4e17..823c4be04d 100644 --- a/physionet-django/physionet/settings/base.py +++ b/physionet-django/physionet/settings/base.py @@ -17,7 +17,6 @@ from decouple import config, UndefinedValueError -from project.models import DataSource # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -620,5 +619,5 @@ class StorageTypes: # minimum number of word needed for research_summary field for Credentialing Model. MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = config('MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING', cast=int, default=20) -DEFAULT_PROJECT_DATA_LOCATION = config('DEFAULT_PROJECT_DATA_LOCATION', default=DataSource.DataLocation.DIRECT) +DEFAULT_PROJECT_DATA_LOCATION = config('DEFAULT_PROJECT_DATA_LOCATION', default='DI') DEFAULT_PROJECT_ACCESS_MECHANISM = config('DEFAULT_PROJECT_ACCESS_MECHANISM', default=None)