Skip to content

Commit

Permalink
auto create data source on project publish
Browse files Browse the repository at this point in the history
When a project is published, we should always create a datasource
object based on the project storage type because with  the
DataSource  all access will go though it, otherwise the files wont
be accessible.

This commit creates the DataSource type automatically
when project is published.

The type of DataSource created depends on the settings.
For Physionet, the default data storage is direct,
and for HDN its GCP(defined by STORAGE_TYPE).

For auto creation of DataSource, i added a new environment variable
to control type of DataSource.  On second thought , i don't think we need
the new environment variable as we only have two type of storage
when project is published, and the new env variable don't server any new purpose
(i will wait for feedback from reviewers)
  • Loading branch information
superryeti committed Apr 10, 2023
1 parent b09b1b9 commit 4f8686c
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 1 deletion.
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,9 @@ DEFAULT_NUMBER_OF_APPLICATIONS_TO_REMIND = 5

# minimum number of word needed for research_summary field for Credentialing Model.
MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = 20


# Data Source configurations
# Data Source determines how the data is stored(direct on server, gcs, big query, aws s3) and accessed(Direct, google, aws, Research Environment)
# OPTIONS: direct, google, aws, research_environment
DEFAULT_PROJECT_ACCESS_MECHANISM = 'direct'
2 changes: 2 additions & 0 deletions physionet-django/physionet/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,3 +612,5 @@ class StorageTypes:

# minimum number of word needed for research_summary field for Credentialing Model.
MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = config('MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING', cast=int, default=20)

DEFAULT_PROJECT_ACCESS_MECHANISM = config('DEFAULT_PROJECT_ACCESS_MECHANISM', default='direct')
9 changes: 9 additions & 0 deletions physionet-django/project/projectfiles/gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from django.shortcuts import redirect
from google.cloud.exceptions import Conflict, NotFound
from physionet.gcs import GCSObject, GCSObjectException, create_bucket, delete_bucket
from project.models import DataSource
from project.projectfiles.base import BaseProjectFiles
from project.utility import DirectoryInfo, FileInfo, readable_size

Expand Down Expand Up @@ -117,6 +118,14 @@ def publish_initial(self, active_project, published_project):

def publish_complete(self, active_project, published_project):
self.rm_dir(active_project.file_root())
if settings.DEFAULT_PROJECT_ACCESS_MECHANISM == 'research_environment':
DataSource.objects.create(
project=published_project,
files_available=True,
data_location=DataSource.DataLocation.GOOGLE_CLOUD_STORAGE,
access_mechanism=DataSource.AccessMechanism.RESEARCH_ENVIRONMENT,
uri=f'gs://{published_project.project_file_root()}/',
)

def publish_rollback(self, active_project, published_project):
delete_bucket(published_project.project_file_root())
Expand Down
9 changes: 8 additions & 1 deletion physionet-django/project/projectfiles/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from django.conf import settings
from physionet.utility import serve_file, sorted_tree_files, zip_dir
from project.models import DataSource
from project.projectfiles.base import BaseProjectFiles
from project.utility import (
clear_directory,
Expand Down Expand Up @@ -120,7 +121,13 @@ def publish_rollback(self, active_project, published_project):
os.rename(published_project.file_root(), active_project.file_root())

def publish_complete(self, active_project, published_project):
pass
if settings.DEFAULT_PROJECT_ACCESS_MECHANISM == 'direct':
DataSource.objects.create(
project=published_project,
files_available=True,
data_location=DataSource.DataLocation.DIRECT,
)


def get_project_file_root(self, slug, version, access_policy, klass):
if access_policy:
Expand Down

0 comments on commit 4f8686c

Please sign in to comment.