diff --git a/.env.example b/.env.example
index 1a2f1706fd..bbe52513fc 100644
--- a/.env.example
+++ b/.env.example
@@ -187,7 +187,18 @@ MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = 20
# CITISOAPService API
# This is the WebServices username and password to access the CITI SOAP Service to obtain users training report details
# The account can be created at https://webservices.citiprogram.org/login/CreateAccount.aspx
-# The SOAP Service Access can be tested at https://webservices.citiprogram.org/Client/CITISOAPClient_Simple.aspx
+# The SOAP Service Access can be tested at https://webservices.citiprogram.org/Client/CITISOAPClient_Simple.aspx
CITI_USERNAME=
CITI_PASSWORD=
CITI_SOAP_URL="https://webservices.citiprogram.org/SOAP/CITISOAPService.asmx"
+
+
+# Data Source configurations
+# DEFAULT_PROJECT_DATA_LOCATION controls how data will be stored when a project is published (Direct (DI), Google BigQuery (GBQ), Google Cloud Storage (GCS), AWS Open Data (AOD), AWS S3 (AS3))
+# OPTIONS are: DI, GBQ, GCS, AOD, AS3
+DEFAULT_PROJECT_DATA_LOCATION = 'DI'
+
+# DEFAULT_PROJECT_ACCESS_MECHANISM controls how users can use the data (Google Group Email (google-group-email), S3 (s3), Research Environment (research-environment))
+# OPTIONS are: google-group-email, s3, research-environment or "" (empty string) for no access mechanism(only direct access)
+# as of now, Research Environment is only available for GCS data location
+DEFAULT_PROJECT_ACCESS_MECHANISM = ''
diff --git a/physionet-django/console/forms.py b/physionet-django/console/forms.py
index 74f86d7923..548c916b23 100644
--- a/physionet-django/console/forms.py
+++ b/physionet-django/console/forms.py
@@ -21,6 +21,7 @@
Contact,
CopyeditLog,
DataAccess,
+ DataSource,
DUA,
EditLog,
License,
@@ -689,6 +690,26 @@ def save(self):
return data_access
+class DataSourceForm(forms.ModelForm):
+ class Meta:
+ model = DataSource
+ fields = ('data_location', 'access_mechanism', 'files_available', 'email', 'uri' )
+
+ def __init__(self, project, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.project = project
+
+ if not settings.ENABLE_CLOUD_RESEARCH_ENVIRONMENTS:
+ self.fields['access_mechanism'].choices = [
+ choice for choice in self.fields['access_mechanism'].choices if choice[0] != 'research-environment']
+
+ def save(self):
+ data_source = super(DataSourceForm, self).save(commit=False)
+ data_source.project = self.project
+ data_source.save()
+ return data_source
+
+
class PublishedProjectContactForm(forms.ModelForm):
class Meta:
model = Contact
diff --git a/physionet-django/console/templates/console/manage_published_project.html b/physionet-django/console/templates/console/manage_published_project.html
index 7b3902ea2e..cf48ed61e8 100644
--- a/physionet-django/console/templates/console/manage_published_project.html
+++ b/physionet-django/console/templates/console/manage_published_project.html
@@ -359,6 +359,46 @@
Storage location
{% endif %}
+
+
Data Source
+
Add and remove Data Source options.
+{#
#}
+{#
Note: The remove button will remove the option for requesting cloud access that appears in the files section of a project. It will not (1) delete/deactivate the bucket or (2) remove access for users who are already using the bucket.
#}
+{#
#}
+
+ {% if data_sources %}
+
+
+
Location
+
Access Mechanism
+
Files Available
+
Email
+
Uri
+
Remove
+
+
+
+ {% for item in data_sources %}
+
+
{{item.data_location}}
+
{{item.access_mechanism}}
+
{{item.files_available}}
+
{{item.email}}
+
{{item.uri}}
+
+
+ {% endfor %}
+
+
+ {% endif %}
+
Google Cloud
{% if not has_credentials %}
@@ -388,6 +428,7 @@
Google Cloud
+
{% endblock %}
diff --git a/physionet-django/console/views.py b/physionet-django/console/views.py
index 07fc7e6919..ede8109e95 100644
--- a/physionet-django/console/views.py
+++ b/physionet-django/console/views.py
@@ -43,6 +43,7 @@
ActiveProject,
ArchivedProject,
DataAccess,
+ DataSource,
DUA,
DataAccessRequest,
DUASignature,
@@ -829,6 +830,7 @@ def manage_published_project(request, project_slug, version):
deprecate_form = None if project.deprecated_files else forms.DeprecateFilesForm()
has_credentials = bool(settings.GOOGLE_APPLICATION_CREDENTIALS)
data_access_form = forms.DataAccessForm(project=project)
+ data_source_form = forms.DataSourceForm(project=project)
contact_form = forms.PublishedProjectContactForm(project=project,
instance=project.contact)
legacy_author_form = forms.CreateLegacyAuthorForm(project=project)
@@ -895,6 +897,18 @@ def manage_published_project(request, project_slug, version):
if data_access_form.is_valid():
data_access_form.save()
messages.success(request, "Stored method to access the files")
+ elif 'data_location' in request.POST:
+ data_source_form = forms.DataSourceForm(project=project, data=request.POST)
+ if data_source_form.is_valid():
+ data_source_form.save()
+ messages.success(request, "Stored method to access the files")
+ elif 'data_source_removal' in request.POST and request.POST['data_source_removal'].isdigit():
+ try:
+ data_source = DataSource.objects.get(project=project, id=request.POST['data_source_removal'])
+ data_source.delete()
+ # Deletes the object if it exists for that specific project.
+ except DataSource.DoesNotExist:
+ pass
elif 'data_access_removal' in request.POST and request.POST['data_access_removal'].isdigit():
try:
data_access = DataAccess.objects.get(project=project, id=request.POST['data_access_removal'])
@@ -921,6 +935,7 @@ def manage_published_project(request, project_slug, version):
legacy_author_form = forms.CreateLegacyAuthorForm(project=project)
data_access = DataAccess.objects.filter(project=project)
+ data_sources = DataSource.objects.filter(project=project)
authors, author_emails, storage_info, edit_logs, copyedit_logs, latest_version = project.info_card()
tasks = list(get_associated_tasks(project))
@@ -946,7 +961,9 @@ def manage_published_project(request, project_slug, version):
'deprecate_form': deprecate_form,
'has_credentials': has_credentials,
'data_access_form': data_access_form,
+ 'data_source_form': data_source_form,
'data_access': data_access,
+ 'data_sources': data_sources,
'rw_tasks': rw_tasks,
'ro_tasks': ro_tasks,
'anonymous_url': anonymous_url,
diff --git a/physionet-django/physionet/settings/base.py b/physionet-django/physionet/settings/base.py
index 471f329dba..823c4be04d 100644
--- a/physionet-django/physionet/settings/base.py
+++ b/physionet-django/physionet/settings/base.py
@@ -17,6 +17,7 @@
from decouple import config, UndefinedValueError
+
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -617,3 +618,6 @@ class StorageTypes:
# minimum number of word needed for research_summary field for Credentialing Model.
MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = config('MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING', cast=int, default=20)
+
+DEFAULT_PROJECT_DATA_LOCATION = config('DEFAULT_PROJECT_DATA_LOCATION', default='DI')
+DEFAULT_PROJECT_ACCESS_MECHANISM = config('DEFAULT_PROJECT_ACCESS_MECHANISM', default=None)
diff --git a/physionet-django/project/migrations/0068_datasource.py b/physionet-django/project/migrations/0068_datasource.py
new file mode 100644
index 0000000000..320a64f353
--- /dev/null
+++ b/physionet-django/project/migrations/0068_datasource.py
@@ -0,0 +1,30 @@
+# Generated by Django 4.1.7 on 2023-04-19 14:03
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('project', '0067_alter_activeproject_core_project_and_more'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='DataSource',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('files_available', models.BooleanField(default=False)),
+ ('data_location', models.CharField(choices=[('DI', 'Direct'), ('GBQ', 'Google BigQuery'), ('GCS', 'Google Cloud Storage'), ('AOD', 'AWS Open Data'), ('AS3', 'AWS S3')], default='DI', max_length=3)),
+ ('access_mechanism', models.CharField(blank=True, choices=[('google-group-email', 'Google Group Email'), ('s3', 'S3'), ('research-environment', 'Research Environment')], max_length=20, null=True)),
+ ('email', models.CharField(blank=True, max_length=320, null=True)),
+ ('uri', models.CharField(blank=True, max_length=320, null=True)),
+ ('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='data_sources', to='project.publishedproject')),
+ ],
+ options={
+ 'default_permissions': (),
+ 'unique_together': {('project', 'data_location')},
+ },
+ ),
+ ]
diff --git a/physionet-django/project/modelcomponents/access.py b/physionet-django/project/modelcomponents/access.py
index 0ab3df2549..707efb092c 100644
--- a/physionet-django/project/modelcomponents/access.py
+++ b/physionet-django/project/modelcomponents/access.py
@@ -1,16 +1,20 @@
from datetime import timedelta
from enum import IntEnum
+from django.conf import settings
from django.contrib.auth.hashers import check_password, make_password
from django.contrib.contenttypes.fields import GenericForeignKey
from django.contrib.contenttypes.models import ContentType
+from django.core.exceptions import ValidationError
from django.db import models
from django.utils import timezone
from django.utils.crypto import get_random_string
+from django.utils.translation import gettext_lazy as _
from project.modelcomponents.fields import SafeHTMLField
from project.validators import validate_version
from project.managers.access import DataAccessRequestQuerySet, DataAccessRequestManager
+from physionet.settings.base import StorageTypes
class AccessPolicy(IntEnum):
@@ -167,6 +171,67 @@ class Meta:
default_permissions = ()
+class DataSource(models.Model):
+ """
+ Controls all access to project data.
+ """
+ class DataLocation(models.TextChoices):
+ DIRECT = 'DI', 'Direct'
+ GOOGLE_BIGQUERY = 'GBQ', 'Google BigQuery'
+ GOOGLE_CLOUD_STORAGE = 'GCS', 'Google Cloud Storage'
+ AWS_OPEN_DATA = 'AOD', 'AWS Open Data'
+ AWS_S3 = 'AS3', 'AWS S3'
+
+ class AccessMechanism(models.TextChoices):
+ GOOGLE_GROUP_EMAIL = 'google-group-email', 'Google Group Email'
+ S3 = 's3', 'S3'
+ RESEARCH_ENVIRONMENT = 'research-environment', 'Research Environment'
+
+ project = models.ForeignKey('project.PublishedProject',
+ related_name='data_sources', db_index=True, on_delete=models.CASCADE)
+ files_available = models.BooleanField(default=False)
+ data_location = models.CharField(max_length=3, choices=DataLocation.choices)
+ access_mechanism = models.CharField(max_length=20, choices=AccessMechanism.choices, null=True, blank=True)
+ email = models.CharField(max_length=320, null=True, blank=True)
+ uri = models.CharField(max_length=320, null=True, blank=True)
+
+ class Meta:
+ default_permissions = ()
+ unique_together = ('project', 'data_location')
+
+ def clean(self):
+ super().clean()
+
+ if self.data_location == self.DataLocation.GOOGLE_BIGQUERY:
+ if self.access_mechanism != self.AccessMechanism.GOOGLE_GROUP_EMAIL:
+ raise ValidationError('Google BigQuery data sources must use the Google Group Email access mechanism.')
+ if not self.email:
+ raise ValidationError('Google BigQuery data sources must have an email address.')
+ elif self.data_location == self.DataLocation.GOOGLE_CLOUD_STORAGE:
+ if self.access_mechanism != self.AccessMechanism.GOOGLE_GROUP_EMAIL:
+ raise ValidationError('Google Cloud Storage data sources must use the Google Group Email access '
+ 'mechanism.')
+ if not self.uri:
+ raise ValidationError('Google Cloud Storage data sources must have an uri address.')
+ elif self.data_location == self.DataLocation.AWS_OPEN_DATA:
+ if self.access_mechanism != self.AccessMechanism.S3:
+ raise ValidationError('AWS Open Data data sources must use the S3 access mechanism.')
+ if not self.uri:
+ raise ValidationError('AWS Open Data data sources must have a URI.')
+ elif self.data_location == self.DataLocation.AWS_S3:
+ if self.access_mechanism != self.AccessMechanism.S3:
+ raise ValidationError('AWS S3 data sources must use the S3 access mechanism.')
+ if not self.uri:
+ raise ValidationError('AWS S3 data sources must have a URI.')
+ elif self.data_location == self.DataLocation.DIRECT:
+ if self.email:
+ raise ValidationError('Direct data sources must not have an email address.')
+ if self.uri:
+ raise ValidationError('Direct data sources must not have a URI.')
+ else:
+ raise ValidationError('Invalid data location.')
+
+
class AnonymousAccess(models.Model):
"""
Makes it possible to grant anonymous access (without user auth)
@@ -274,3 +339,42 @@ class Meta:
def __str__(self):
return self.name
+
+
+class DataSourceCreator:
+ def __init__(self, **kwargs):
+ self.data_location = kwargs.get('data_location', None)
+ self.files_available = kwargs.get('files_available', None)
+ self.email = kwargs.get('email', None)
+ self.uri = kwargs.get('uri', None)
+ self.access_mechanism = kwargs.get('access_mechanism', None)
+
+ def create(self, project):
+ DataSource.objects.create(
+ project=project,
+ files_available=self.files_available,
+ data_location=self.data_location,
+ access_mechanism=self.access_mechanism,
+ email=self.email,
+ uri=self.uri,
+ )
+
+ @staticmethod
+ def create_default(project):
+ if (settings.DEFAULT_PROJECT_DATA_LOCATION == DataSource.DataLocation.DIRECT
+ and settings.STORAGE_TYPE == StorageTypes.LOCAL):
+ DataSource.objects.create(
+ project=project,
+ files_available=True,
+ data_location=DataSource.DataLocation.DIRECT,
+ )
+ elif (settings.DEFAULT_PROJECT_ACCESS_MECHANISM == DataSource.DataLocation.RESEARCH_ENVIRONMENT
+ and settings.DEFAULT_PROJECT_DATA_LOCATION == DataSource.DataLocation.GOOGLE_CLOUD_STORAGE
+ and settings.STORAGE_TYPE == StorageTypes.GCP):
+ DataSource.objects.create(
+ project=project,
+ files_available=False,
+ data_location=DataSource.DataLocation.GOOGLE_CLOUD_STORAGE,
+ uri=f'gs://{project.project_file_root()}/',
+ access_mechanism=DataSource.AccessMechanism.RESEARCH_ENVIRONMENT,
+ )
diff --git a/physionet-django/project/modelcomponents/activeproject.py b/physionet-django/project/modelcomponents/activeproject.py
index 2ece76d93c..469a13c099 100644
--- a/physionet-django/project/modelcomponents/activeproject.py
+++ b/physionet-django/project/modelcomponents/activeproject.py
@@ -29,6 +29,7 @@
from project.modelcomponents.submission import CopyeditLog, EditLog, SubmissionInfo
from project.modelcomponents.unpublishedproject import UnpublishedProject
from project.projectfiles import ProjectFiles
+from project.models import DataSourceCreator
from project.validators import validate_subdir
LOGGER = logging.getLogger(__name__)
@@ -614,6 +615,7 @@ def publish(self, slug=None, make_zip=True, title=None):
raise
ProjectFiles().publish_complete(self, published_project)
+ DataSourceCreator().create_default(published_project)
return published_project
diff --git a/physionet-django/project/templates/project/published_project.html b/physionet-django/project/templates/project/published_project.html
index 7975abf812..7aa076e690 100644
--- a/physionet-django/project/templates/project/published_project.html
+++ b/physionet-django/project/templates/project/published_project.html
@@ -36,26 +36,26 @@
{{ project.title }}
- {% for author in authors %}
- {{ author|show_author_info|safe }}
- {% if forloop.counter < authors|length %}, {% endif %}
- {% endfor %}
+ {% for author in authors %}
+ {{ author|show_author_info|safe }}
+ {% if forloop.counter < authors|length %}, {% endif %}
+ {% endfor %}
The files for this version of the project ({{ project.version }}) are no longer available. The
latest version of this project is
{{ latest_version.version }}
+ target="_blank">{{ latest_version.version }}
{% endif %}