Skip to content

Commit

Permalink
Merge pull request #29 from Laugslander/feature/set-folder-structure-…
Browse files Browse the repository at this point in the history
…for-download-and-upload

Allow for configuring the folder structure (flat / hierarchical) for download and upload
  • Loading branch information
Laugslander authored Jul 5, 2023
2 parents 9b5895e + bbafb11 commit 0a4ad09
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 38 deletions.
78 changes: 60 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ with smart_auth(domain) as auth:
## Folder structure

### Flat (default)

```
local_folder
Expand All @@ -98,29 +100,58 @@ local_folder
| │ ...
└─── institute1
│ │ series_1.dcm
│ │ series_2.dcm
└─── patient1---study1
│ │ series_1.dcm
│ │ series_2.dcm
│ │ ...
└─── patient1---study2
│ │ series_1.dcm
│ │ series_2.dcm
│ │ ...
└─── patient2---study1
│ series_1.dcm
│ series_2.dcm
│ ...
```

### Hierarchical

```
local_folder
└─── files
│ │ file_1.txt
│ │ file_2.txt
│ │ ...
│ │
│ └─── subfolder1
│ │ series_3.dcm
│ │ series_4.dcm
│ ...
│ │ file_3.txt
│ │ file_4.txt
| │ ...
└─── institute2
│ series_1.dcm
│ series_2.dcm
│ ...
└─── institute1
└─── subfolder1
│ │ series_3.dcm
│ │ series_4.dcm
│ │ ...
└─── patient1
│ │
│ └─── study1
│ │ │ series_1.dcm
│ │ │ series_2.dcm
│ │ │ ...
│ │
│ └─── study2
│ │ series_1.dcm
│ │ series_2.dcm
│ │ ...
└─── subfolder2
│ series_5.dcm
│ series_6.dcm
│ ...
└─── patient2
│ │
│ └─── study1
│ │ series_1.dcm
│ │ series_2.dcm
│ │ ...
```

Where `local_folder` is the specified LOCAL_FOLDER via the `upload` or `download` command.
Expand All @@ -139,7 +170,18 @@ Other folders/files under the `local_folder` directory will be ignored.

### Download institute-level files

Institutes will be downloaded to the LOCAL_FOLDER argument and will create sub folders based on the institute name. All studies of each institute will be downloaded as subdirectories under each institute folder.
Institutes will be downloaded to the LOCAL_FOLDER argument and will create sub folders based on the institute name.

#### Flat

In `flat` file structure mode (default), all studies of each institute will be downloaded as flat subdirectories under each institute folder.
The folder name will be a combination of the patient and study identifiers.

#### Hierarchical

In `hierarchical` file structure mode, all studies of each institute will be downloaded in a nested folder structure.
In the institute folder, the first level of directories will resemble the patients.
Each patient folder will contain nested folder for each study that belongs to this patient.

## Developers

Expand Down
24 changes: 18 additions & 6 deletions src/igtcloud/client/tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,14 @@ def cli():
@click.option('--debug', flag_value=True, help='Enable debug logging')
@click.option('--concurrent-studies', type=int, default=None, help='Maximum number of concurrent studies download')
@click.option('--concurrent-files', type=int, default=None, help='Maximum number of concurrent files download per study')
@click.option(
'--folder-structure',
default='flat',
type=click.Choice(['flat', 'hierarchical'], case_sensitive=False),
help='Folder structure of the data to be downloaded.'
)
def download(target_folder, project, institute, environment, domain, user, ext, start, end, category,
include_modified_date, project_files, debug, concurrent_studies, concurrent_files):
include_modified_date, project_files, debug, concurrent_studies, concurrent_files, folder_structure):
"""Download data from Philips Interventional Cloud.
\b
Expand Down Expand Up @@ -91,10 +97,10 @@ def download(target_folder, project, institute, environment, domain, user, ext,

return

download_institutes(project, institute, target_folder, categories=category, files_filter=filter_by_ext(ext),
studies_filter=filter_by_study_date(start, end),
download_institutes(project, institute, target_folder, categories=category,
studies_filter=filter_by_study_date(start, end), files_filter=filter_by_ext(ext),
include_modified_date=include_modified_date, max_workers_studies=concurrent_studies,
max_workers_files=concurrent_files)
max_workers_files=concurrent_files, folder_structure=folder_structure)


@click.command(short_help="List data from Philips Interventional Cloud in CSV file")
Expand Down Expand Up @@ -158,8 +164,14 @@ def _get_domain(domain, environment):
@click.option('--debug', flag_value=True, help='Enable debug logging')
@click.option('--concurrent-studies', type=int, default=None, help='Maximum number of concurrent studies upload')
@click.option('--concurrent-files', type=int, default=None, help='Maximum number of concurrent files upload per study')
@click.option(
'--folder-structure',
default='flat',
type=click.Choice(['flat', 'hierarchical'], case_sensitive=False),
help='Folder structure of the data to be uploaded.'
)
def upload(local_folder, project, institute, environment, domain, user, submit, debug, concurrent_studies,
concurrent_files):
concurrent_files, folder_structure):
"""Upload data to Philips Interventional Cloud.
\b
Expand All @@ -180,7 +192,7 @@ def upload(local_folder, project, institute, environment, domain, user, submit,
with smart_auth(domain, username=user) as auth:
set_auth(auth)
logger.info(f"Using url: {auth.domain}")
upload_project(local_folder, project, institute, submit, concurrent_studies, concurrent_files)
upload_project(local_folder, project, institute, submit, concurrent_studies, concurrent_files, folder_structure)


@click.command(short_help="Login to Philips Interventional Cloud")
Expand Down
22 changes: 16 additions & 6 deletions src/igtcloud/client/tools/download_institute.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
def download_institutes(project_name: str, institute_name: str, destination: str, categories: List[str] = None,
studies_filter: Callable[[RootStudy], bool] = None,
files_filter: Callable[[File, RootStudy], bool] = None, include_modified_date: bool = False,
max_workers_studies: int = None, max_workers_files: int = None):
max_workers_studies: int = None, max_workers_files: int = None, folder_structure: str = None):
project, institutes = find_project_and_institutes(project_name, institute_name)
if not project:
logger.error(f"Project not found: {project_name}")
Expand All @@ -28,6 +28,13 @@ def download_institutes(project_name: str, institute_name: str, destination: str
logger.error(f"No institutes found")
return

categories = [category for category in categories or [] if category in ['files', 'dicom', 'annotations']]
if not categories:
categories = ['files']

if folder_structure not in ['flat', 'hierarchical']:
folder_structure = 'flat'

for institute in tqdm(institutes, desc="Institutes", unit="Institute"):
logger.info(f"Institute name: {institute.name}, project type: {project.project_type_name}, "
f"destination: {destination}")
Expand All @@ -38,15 +45,11 @@ def download_institutes(project_name: str, institute_name: str, destination: str
if callable(studies_filter):
studies = list(filter(studies_filter, studies))

categories = [category for category in categories or [] if category in ['files', 'dicom', 'annotations']]
if not categories:
categories = ['files']

with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers_studies or 4) as executor:
with tqdm(total=len(studies), desc="Studies", unit='study') as pbar:
fs = {executor.submit(_download_study,
study,
os.path.join(destination, institute.name, study.study_id_human_readable),
_create_study_destination_path(destination, institute, study, folder_structure),
categories,
files_filter,
include_modified_date,
Expand All @@ -61,6 +64,13 @@ def download_institutes(project_name: str, institute_name: str, destination: str
logger.exception(f"Exception during download of {study.study_id_human_readable}")


def _create_study_destination_path(destination: str, institute, study, folder_structure) -> str:
study_destination = study.study_id_human_readable
study_destination = study_destination.replace('/', '---') if folder_structure == 'flat' else study_destination

return os.path.join(destination, institute.name, os.path.normpath(study_destination))


def _download_study(study, study_destination, categories, files_filter, include_modified_date, max_workers_files: int):
study_json_file = os.path.join(study_destination, 'study.json')
os.makedirs(os.path.dirname(study_json_file), exist_ok=True)
Expand Down
35 changes: 27 additions & 8 deletions src/igtcloud/client/tools/upload_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


def upload_project(local_folder: str, project_name: str, institute_name: str = None, submit: bool = False,
max_workers_studies: int = None, max_workers_files: int = None):
max_workers_studies: int = None, max_workers_files: int = None, folder_structure: str = None):
project, institutes = find_project_and_institutes(project_name, institute_name)

if not project and not institutes:
Expand All @@ -46,31 +46,50 @@ def upload_project(local_folder: str, project_name: str, institute_name: str = N

return

if folder_structure not in ['flat', 'hierarchical']:
folder_structure = 'flat'

with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers_studies or 4) as executor:
for institute in institutes:
logger.info(f"Uploading to institute: {institute.name}")

institute_dir = os.path.join(local_folder, institute.name)
existing_studies = institute.studies

local_study_folders = (os.path.join(institute_dir, d) for d in os.listdir(institute_dir) if
os.path.isdir(os.path.join(institute_dir, d)))
local_studies = {}

for d in os.listdir(institute_dir):
if os.path.isdir(os.path.join(institute_dir, d)):
if folder_structure == 'flat':
study_dir = os.path.join(institute_dir, d)
patient_name = os.path.basename(study_dir)

local_studies[study_dir] = patient_name
else:
patient_dir = os.path.join(institute_dir, d)
patient_name = os.path.basename(patient_dir)

fs = [executor.submit(upload_study, institute.study_type, study_folder, institute.id, existing_studies,
_password, max_workers_files) for study_folder in local_study_folders]
for study_dir in os.listdir(patient_dir):
study_dir = os.path.join(patient_dir, study_dir)

local_studies[study_dir] = patient_name

fs = [executor.submit(upload_study, institute.study_type, study_folder, local_studies[study_folder],
institute.id, existing_studies, _password, max_workers_files) for study_folder in local_studies]

for f in tqdm(concurrent.futures.as_completed(fs), total=len(fs), desc="Studies", unit='study'):
study, files_uploaded, files_skipped = f.result()
logger.info(f"Study: {study.study_id_human_readable} files_uploaded: {len(files_uploaded)}, "
f"files_skipped: {len(files_skipped)}")


def upload_study(study_type: str, study_folder: str, institute_id: str, studies: CollectionWrapper[RootStudy],
_submit_password: str = None, max_workers_files: int = None) -> Tuple[RootStudy, List[str], List[str]]:
def upload_study(study_type: str, study_folder: str, patient_name: str, institute_id: str,
studies: CollectionWrapper[RootStudy], _submit_password: str = None,
max_workers_files: int = None) -> Tuple[RootStudy, List[str], List[str]]:
local_study = None
study_cls = entities_service.study_type_classes.get(study_type)
study_json_file = os.path.join(study_folder, 'study.json')
patient_name = os.path.basename(study_folder)

if os.path.exists(study_json_file):
try:
with open(study_json_file, 'r') as f:
Expand Down

0 comments on commit 0a4ad09

Please sign in to comment.