diff --git a/.env.example b/.env.example index 1a2f1706fd..bbe52513fc 100644 --- a/.env.example +++ b/.env.example @@ -187,7 +187,18 @@ MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = 20 # CITISOAPService API # This is the WebServices username and password to access the CITI SOAP Service to obtain users training report details # The account can be created at https://webservices.citiprogram.org/login/CreateAccount.aspx -# The SOAP Service Access can be tested at https://webservices.citiprogram.org/Client/CITISOAPClient_Simple.aspx +# The SOAP Service Access can be tested at https://webservices.citiprogram.org/Client/CITISOAPClient_Simple.aspx CITI_USERNAME= CITI_PASSWORD= CITI_SOAP_URL="https://webservices.citiprogram.org/SOAP/CITISOAPService.asmx" + + +# Data Source configurations +# DEFAULT_PROJECT_DATA_LOCATION controls how data will be stored when a project is published (Direct (DI), Google BigQuery (GBQ), Google Cloud Storage (GCS), AWS Open Data (AOD), AWS S3 (AS3)) +# OPTIONS are: DI, GBQ, GCS, AOD, AS3 +DEFAULT_PROJECT_DATA_LOCATION = 'DI' + +# DEFAULT_PROJECT_ACCESS_MECHANISM controls how users can use the data (Google Group Email (google-group-email), S3 (s3), Research Environment (research-environment)) +# OPTIONS are: google-group-email, s3, research-environment or "" (empty string) for no access mechanism(only direct access) +# as of now, Research Environment is only available for GCS data location +DEFAULT_PROJECT_ACCESS_MECHANISM = '' diff --git a/physionet-django/console/forms.py b/physionet-django/console/forms.py index 74f86d7923..548c916b23 100644 --- a/physionet-django/console/forms.py +++ b/physionet-django/console/forms.py @@ -21,6 +21,7 @@ Contact, CopyeditLog, DataAccess, + DataSource, DUA, EditLog, License, @@ -689,6 +690,26 @@ def save(self): return data_access +class DataSourceForm(forms.ModelForm): + class Meta: + model = DataSource + fields = ('data_location', 'access_mechanism', 'files_available', 'email', 'uri' ) + + def __init__(self, project, *args, **kwargs): + super().__init__(*args, **kwargs) + self.project = project + + if not settings.ENABLE_CLOUD_RESEARCH_ENVIRONMENTS: + self.fields['access_mechanism'].choices = [ + choice for choice in self.fields['access_mechanism'].choices if choice[0] != 'research-environment'] + + def save(self): + data_source = super(DataSourceForm, self).save(commit=False) + data_source.project = self.project + data_source.save() + return data_source + + class PublishedProjectContactForm(forms.ModelForm): class Meta: model = Contact diff --git a/physionet-django/console/templates/console/manage_published_project.html b/physionet-django/console/templates/console/manage_published_project.html index 7b3902ea2e..cf48ed61e8 100644 --- a/physionet-django/console/templates/console/manage_published_project.html +++ b/physionet-django/console/templates/console/manage_published_project.html @@ -359,6 +359,46 @@
Storage location
{% endif %} +
  • +
    Data Source
    +

    Add and remove Data Source options.

    +{#
    #} +{#

    Note: The remove button will remove the option for requesting cloud access that appears in the files section of a project. It will not (1) delete/deactivate the bucket or (2) remove access for users who are already using the bucket.

    #} +{#
    #} +
    + {% csrf_token %} + {% include "project/content_inline_form_snippet.html" with form=data_source_form %} + +
    + {% if data_sources %} + + + + + + + + + + + + {% for item in data_sources %} + + + + + + + + {% csrf_token %} + + + + {% endfor %} + +
    LocationAccess MechanismFiles AvailableEmailUriRemove
    {{item.data_location}}{{item.access_mechanism}}{{item.files_available}}{{item.email}}{{item.uri}}
    + {% endif %} +
  • Google Cloud
    {% if not has_credentials %} @@ -388,6 +428,7 @@
    Google Cloud
    + {% endblock %} diff --git a/physionet-django/console/views.py b/physionet-django/console/views.py index 07fc7e6919..ede8109e95 100644 --- a/physionet-django/console/views.py +++ b/physionet-django/console/views.py @@ -43,6 +43,7 @@ ActiveProject, ArchivedProject, DataAccess, + DataSource, DUA, DataAccessRequest, DUASignature, @@ -829,6 +830,7 @@ def manage_published_project(request, project_slug, version): deprecate_form = None if project.deprecated_files else forms.DeprecateFilesForm() has_credentials = bool(settings.GOOGLE_APPLICATION_CREDENTIALS) data_access_form = forms.DataAccessForm(project=project) + data_source_form = forms.DataSourceForm(project=project) contact_form = forms.PublishedProjectContactForm(project=project, instance=project.contact) legacy_author_form = forms.CreateLegacyAuthorForm(project=project) @@ -895,6 +897,18 @@ def manage_published_project(request, project_slug, version): if data_access_form.is_valid(): data_access_form.save() messages.success(request, "Stored method to access the files") + elif 'data_location' in request.POST: + data_source_form = forms.DataSourceForm(project=project, data=request.POST) + if data_source_form.is_valid(): + data_source_form.save() + messages.success(request, "Stored method to access the files") + elif 'data_source_removal' in request.POST and request.POST['data_source_removal'].isdigit(): + try: + data_source = DataSource.objects.get(project=project, id=request.POST['data_source_removal']) + data_source.delete() + # Deletes the object if it exists for that specific project. + except DataSource.DoesNotExist: + pass elif 'data_access_removal' in request.POST and request.POST['data_access_removal'].isdigit(): try: data_access = DataAccess.objects.get(project=project, id=request.POST['data_access_removal']) @@ -921,6 +935,7 @@ def manage_published_project(request, project_slug, version): legacy_author_form = forms.CreateLegacyAuthorForm(project=project) data_access = DataAccess.objects.filter(project=project) + data_sources = DataSource.objects.filter(project=project) authors, author_emails, storage_info, edit_logs, copyedit_logs, latest_version = project.info_card() tasks = list(get_associated_tasks(project)) @@ -946,7 +961,9 @@ def manage_published_project(request, project_slug, version): 'deprecate_form': deprecate_form, 'has_credentials': has_credentials, 'data_access_form': data_access_form, + 'data_source_form': data_source_form, 'data_access': data_access, + 'data_sources': data_sources, 'rw_tasks': rw_tasks, 'ro_tasks': ro_tasks, 'anonymous_url': anonymous_url, diff --git a/physionet-django/physionet/settings/base.py b/physionet-django/physionet/settings/base.py index 471f329dba..823c4be04d 100644 --- a/physionet-django/physionet/settings/base.py +++ b/physionet-django/physionet/settings/base.py @@ -17,6 +17,7 @@ from decouple import config, UndefinedValueError + # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -617,3 +618,6 @@ class StorageTypes: # minimum number of word needed for research_summary field for Credentialing Model. MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = config('MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING', cast=int, default=20) + +DEFAULT_PROJECT_DATA_LOCATION = config('DEFAULT_PROJECT_DATA_LOCATION', default='DI') +DEFAULT_PROJECT_ACCESS_MECHANISM = config('DEFAULT_PROJECT_ACCESS_MECHANISM', default=None) diff --git a/physionet-django/project/migrations/0068_datasource.py b/physionet-django/project/migrations/0068_datasource.py new file mode 100644 index 0000000000..320a64f353 --- /dev/null +++ b/physionet-django/project/migrations/0068_datasource.py @@ -0,0 +1,30 @@ +# Generated by Django 4.1.7 on 2023-04-19 14:03 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('project', '0067_alter_activeproject_core_project_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='DataSource', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('files_available', models.BooleanField(default=False)), + ('data_location', models.CharField(choices=[('DI', 'Direct'), ('GBQ', 'Google BigQuery'), ('GCS', 'Google Cloud Storage'), ('AOD', 'AWS Open Data'), ('AS3', 'AWS S3')], default='DI', max_length=3)), + ('access_mechanism', models.CharField(blank=True, choices=[('google-group-email', 'Google Group Email'), ('s3', 'S3'), ('research-environment', 'Research Environment')], max_length=20, null=True)), + ('email', models.CharField(blank=True, max_length=320, null=True)), + ('uri', models.CharField(blank=True, max_length=320, null=True)), + ('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='data_sources', to='project.publishedproject')), + ], + options={ + 'default_permissions': (), + 'unique_together': {('project', 'data_location')}, + }, + ), + ] diff --git a/physionet-django/project/modelcomponents/access.py b/physionet-django/project/modelcomponents/access.py index 0ab3df2549..707efb092c 100644 --- a/physionet-django/project/modelcomponents/access.py +++ b/physionet-django/project/modelcomponents/access.py @@ -1,16 +1,20 @@ from datetime import timedelta from enum import IntEnum +from django.conf import settings from django.contrib.auth.hashers import check_password, make_password from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType +from django.core.exceptions import ValidationError from django.db import models from django.utils import timezone from django.utils.crypto import get_random_string +from django.utils.translation import gettext_lazy as _ from project.modelcomponents.fields import SafeHTMLField from project.validators import validate_version from project.managers.access import DataAccessRequestQuerySet, DataAccessRequestManager +from physionet.settings.base import StorageTypes class AccessPolicy(IntEnum): @@ -167,6 +171,67 @@ class Meta: default_permissions = () +class DataSource(models.Model): + """ + Controls all access to project data. + """ + class DataLocation(models.TextChoices): + DIRECT = 'DI', 'Direct' + GOOGLE_BIGQUERY = 'GBQ', 'Google BigQuery' + GOOGLE_CLOUD_STORAGE = 'GCS', 'Google Cloud Storage' + AWS_OPEN_DATA = 'AOD', 'AWS Open Data' + AWS_S3 = 'AS3', 'AWS S3' + + class AccessMechanism(models.TextChoices): + GOOGLE_GROUP_EMAIL = 'google-group-email', 'Google Group Email' + S3 = 's3', 'S3' + RESEARCH_ENVIRONMENT = 'research-environment', 'Research Environment' + + project = models.ForeignKey('project.PublishedProject', + related_name='data_sources', db_index=True, on_delete=models.CASCADE) + files_available = models.BooleanField(default=False) + data_location = models.CharField(max_length=3, choices=DataLocation.choices) + access_mechanism = models.CharField(max_length=20, choices=AccessMechanism.choices, null=True, blank=True) + email = models.CharField(max_length=320, null=True, blank=True) + uri = models.CharField(max_length=320, null=True, blank=True) + + class Meta: + default_permissions = () + unique_together = ('project', 'data_location') + + def clean(self): + super().clean() + + if self.data_location == self.DataLocation.GOOGLE_BIGQUERY: + if self.access_mechanism != self.AccessMechanism.GOOGLE_GROUP_EMAIL: + raise ValidationError('Google BigQuery data sources must use the Google Group Email access mechanism.') + if not self.email: + raise ValidationError('Google BigQuery data sources must have an email address.') + elif self.data_location == self.DataLocation.GOOGLE_CLOUD_STORAGE: + if self.access_mechanism != self.AccessMechanism.GOOGLE_GROUP_EMAIL: + raise ValidationError('Google Cloud Storage data sources must use the Google Group Email access ' + 'mechanism.') + if not self.uri: + raise ValidationError('Google Cloud Storage data sources must have an uri address.') + elif self.data_location == self.DataLocation.AWS_OPEN_DATA: + if self.access_mechanism != self.AccessMechanism.S3: + raise ValidationError('AWS Open Data data sources must use the S3 access mechanism.') + if not self.uri: + raise ValidationError('AWS Open Data data sources must have a URI.') + elif self.data_location == self.DataLocation.AWS_S3: + if self.access_mechanism != self.AccessMechanism.S3: + raise ValidationError('AWS S3 data sources must use the S3 access mechanism.') + if not self.uri: + raise ValidationError('AWS S3 data sources must have a URI.') + elif self.data_location == self.DataLocation.DIRECT: + if self.email: + raise ValidationError('Direct data sources must not have an email address.') + if self.uri: + raise ValidationError('Direct data sources must not have a URI.') + else: + raise ValidationError('Invalid data location.') + + class AnonymousAccess(models.Model): """ Makes it possible to grant anonymous access (without user auth) @@ -274,3 +339,42 @@ class Meta: def __str__(self): return self.name + + +class DataSourceCreator: + def __init__(self, **kwargs): + self.data_location = kwargs.get('data_location', None) + self.files_available = kwargs.get('files_available', None) + self.email = kwargs.get('email', None) + self.uri = kwargs.get('uri', None) + self.access_mechanism = kwargs.get('access_mechanism', None) + + def create(self, project): + DataSource.objects.create( + project=project, + files_available=self.files_available, + data_location=self.data_location, + access_mechanism=self.access_mechanism, + email=self.email, + uri=self.uri, + ) + + @staticmethod + def create_default(project): + if (settings.DEFAULT_PROJECT_DATA_LOCATION == DataSource.DataLocation.DIRECT + and settings.STORAGE_TYPE == StorageTypes.LOCAL): + DataSource.objects.create( + project=project, + files_available=True, + data_location=DataSource.DataLocation.DIRECT, + ) + elif (settings.DEFAULT_PROJECT_ACCESS_MECHANISM == DataSource.DataLocation.RESEARCH_ENVIRONMENT + and settings.DEFAULT_PROJECT_DATA_LOCATION == DataSource.DataLocation.GOOGLE_CLOUD_STORAGE + and settings.STORAGE_TYPE == StorageTypes.GCP): + DataSource.objects.create( + project=project, + files_available=False, + data_location=DataSource.DataLocation.GOOGLE_CLOUD_STORAGE, + uri=f'gs://{project.project_file_root()}/', + access_mechanism=DataSource.AccessMechanism.RESEARCH_ENVIRONMENT, + ) diff --git a/physionet-django/project/modelcomponents/activeproject.py b/physionet-django/project/modelcomponents/activeproject.py index 2ece76d93c..469a13c099 100644 --- a/physionet-django/project/modelcomponents/activeproject.py +++ b/physionet-django/project/modelcomponents/activeproject.py @@ -29,6 +29,7 @@ from project.modelcomponents.submission import CopyeditLog, EditLog, SubmissionInfo from project.modelcomponents.unpublishedproject import UnpublishedProject from project.projectfiles import ProjectFiles +from project.models import DataSourceCreator from project.validators import validate_subdir LOGGER = logging.getLogger(__name__) @@ -614,6 +615,7 @@ def publish(self, slug=None, make_zip=True, title=None): raise ProjectFiles().publish_complete(self, published_project) + DataSourceCreator().create_default(published_project) return published_project diff --git a/physionet-django/project/templates/project/published_project.html b/physionet-django/project/templates/project/published_project.html index 7975abf812..7aa076e690 100644 --- a/physionet-django/project/templates/project/published_project.html +++ b/physionet-django/project/templates/project/published_project.html @@ -36,26 +36,26 @@

    {{ project.title }}

    - {% for author in authors %} - {{ author|show_author_info|safe }} - {% if forloop.counter < authors|length %}, {% endif %} - {% endfor %} + {% for author in authors %} + {{ author|show_author_info|safe }} + {% if forloop.counter < authors|length %}, {% endif %} + {% endfor %}

    Published: {{ project.publish_datetime|date }}. Version: {{ project.version }}{% if not project.is_latest_version %} - <View latest version>{% endif %} + <View latest version>{% endif %}

    {% if not project.is_latest_version %} - + {% endif %}
    @@ -63,23 +63,23 @@

    {{ project.title }}

    {% if news %} {% endif %} @@ -93,18 +93,18 @@

    {{ project.title }}

    {% if project.is_legacy %} {{ project.full_description|safe }} -
    +
    {% else %} {# 0: Database #} {% if project.resource_type.id == 0 %} {% include "project/database_content.html" %} - {# 1: Software #} + {# 1: Software #} {% elif project.resource_type.id == 1 %} {% include "project/software_content.html" %} - {# 2: Challenge #} + {# 2: Challenge #} {% elif project.resource_type.id == 2 %} {% include "project/challenge_content.html" %} - {# 3: Model #} + {# 3: Model #} {% elif project.resource_type.id == 3 %} {% include "project/model_content.html" %} {% endif %} @@ -116,131 +116,131 @@

    {{ project.title }}

    {# Contents Button #} {% if not project.is_legacy %} -
    - - {% endif %} {% if parent_projects %} -
    -
    Parent Projects
    -
    - {{ project.title }} was derived from: -
      - {% for pp in parent_projects %} -
    • {{ pp }}
    • - {% endfor %} -
    - Please cite them when using this project. +
    +
    Parent Projects
    +
    + {{ project.title }} was derived from: +
      + {% for pp in parent_projects %} +
    • {{ pp }}
    • + {% endfor %} +
    + Please cite them when using this project. +
    -
    {% endif %}
    Share
    + href="mailto:?subject={{ project.title|urlencode }}&body={{ request.build_absolute_uri }}" + role="button" title="Share with email"> + href="http://www.facebook.com/sharer.php?u={{ request.build_absolute_uri }}" role="button" + title="Share on Facebook"> + href="https://www.linkedin.com/shareArticle?url={{ request.build_absolute_uri }}" + role="button" title="Share on LinkedIn"> + href="https://www.reddit.com/submit?url={{ request.build_absolute_uri }}&title={{ project.title|urlencode }}" + role="button" title="Share on Reddit"> + href="https://twitter.com/intent/tweet?text={{ project.title|urlencode }}. {{ request.build_absolute_uri }}" + role="button" title="Share on Twitter">
    @@ -309,7 +309,7 @@
    Discovery

    Project Website:
    {{ project.project_home_page }} + class="fas fa-external-link-alt"> {{ project.project_home_page }}

    {% endif %}
    @@ -335,7 +335,7 @@
    Versions
      {% for project in all_project_versions %}
    • {{ project.version }} + href="{% url 'published_project' project.slug project.version %}">{{ project.version }} - {{ project.publish_datetime|date }}
    • {% endfor %} @@ -355,7 +355,7 @@

      Files

      The files for this version of the project ({{ project.version }}) are no longer available. The latest version of this project is {{ latest_version.version }} + target="_blank">{{ latest_version.version }} {% endif %}
    {% elif project.embargo_active %} @@ -364,85 +364,87 @@

    Files

    {% else %} {% if is_authorized %} - {% if project.allow_file_downloads %} - {# refactored code goes here #} -

    Total uncompressed size: {{ main_size }}.

    - {# ZIP START #} -
    Access the files
    - {% if project.access_policy == AccessPolicy.CONTRIBUTOR_REVIEW %} -

    You have been granted access for a specific - project. If you require access for an additional project, please submit a new request. -

    - {% endif %} - - - {# ZIP END #} + {% if project.access_policy == AccessPolicy.CONTRIBUTOR_REVIEW %} +

    You have been granted access for a specific + project. If you require access for an additional project, please submit a new request. +

    + {% endif %} - {% if is_lightwave_supported and project.access_policy == AccessPolicy.OPEN %} - {% if project.has_wfdb %} -

    Visualize waveforms

    - {% endif %} - {% endif %} + + {# ZIP END #} + + {% if is_lightwave_supported and project.access_policy == AccessPolicy.OPEN %} + {% if project.has_wfdb %} +

    Visualize waveforms

    + {% endif %} + {% endif %} + +
    + {% include "project/files_panel.html" %} +
    + + {% endif %} + + {% endif %} {% endif %} {% endif %}
    @@ -458,7 +460,7 @@
    Access the files
    href="{% url 'data_access_request_status' project.slug project.version %}">please reapply. {% endif %} - +