diff --git a/README.md b/README.md index 72d6d05..01323ee 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,8 @@ with smart_auth(domain) as auth: ## Folder structure +### Flat (default) + ``` local_folder │ @@ -98,29 +100,58 @@ local_folder | │ ... │ └─── institute1 -│ │ series_1.dcm -│ │ series_2.dcm + │ + └─── patient1---study1 + │ │ series_1.dcm + │ │ series_2.dcm + │ │ ... + │ + └─── patient1---study2 + │ │ series_1.dcm + │ │ series_2.dcm + │ │ ... + │ + └─── patient2---study1 + │ series_1.dcm + │ series_2.dcm + │ ... +``` + +### Hierarchical + +``` +local_folder +│ +└─── files +│ │ file_1.txt +│ │ file_2.txt │ │ ... │ │ │ └─── subfolder1 -│ │ series_3.dcm -│ │ series_4.dcm -│ │ ... +│ │ file_3.txt +│ │ file_4.txt +| │ ... │ -└─── institute2 - │ series_1.dcm - │ series_2.dcm - │ ... +└─── institute1 │ - └─── subfolder1 - │ │ series_3.dcm - │ │ series_4.dcm - │ │ ... + └─── patient1 + │ │ + │ └─── study1 + │ │ │ series_1.dcm + │ │ │ series_2.dcm + │ │ │ ... + │ │ + │ └─── study2 + │ │ series_1.dcm + │ │ series_2.dcm + │ │ ... │ - └─── subfolder2 - │ series_5.dcm - │ series_6.dcm - │ ... + └─── patient2 + │ │ + │ └─── study1 + │ │ series_1.dcm + │ │ series_2.dcm + │ │ ... ``` Where `local_folder` is the specified LOCAL_FOLDER via the `upload` or `download` command. @@ -139,7 +170,18 @@ Other folders/files under the `local_folder` directory will be ignored. ### Download institute-level files -Institutes will be downloaded to the LOCAL_FOLDER argument and will create sub folders based on the institute name. All studies of each institute will be downloaded as subdirectories under each institute folder. +Institutes will be downloaded to the LOCAL_FOLDER argument and will create sub folders based on the institute name. + +#### Flat + +In `flat` file structure mode (default), all studies of each institute will be downloaded as flat subdirectories under each institute folder. +The folder name will be a combination of the patient and study identifiers. + +#### Hierarchical + +In `hierarchical` file structure mode, all studies of each institute will be downloaded in a nested folder structure. +In the institute folder, the first level of directories will resemble the patients. +Each patient folder will contain nested folder for each study that belongs to this patient. ## Developers diff --git a/src/igtcloud/client/tools/cli.py b/src/igtcloud/client/tools/cli.py index 69f0d48..38b9298 100644 --- a/src/igtcloud/client/tools/cli.py +++ b/src/igtcloud/client/tools/cli.py @@ -54,8 +54,14 @@ def cli(): @click.option('--debug', flag_value=True, help='Enable debug logging') @click.option('--concurrent-studies', type=int, default=None, help='Maximum number of concurrent studies download') @click.option('--concurrent-files', type=int, default=None, help='Maximum number of concurrent files download per study') +@click.option( + '--folder-structure', + default='flat', + type=click.Choice(['flat', 'hierarchical'], case_sensitive=False), + help='Folder structure of the data to be downloaded.' +) def download(target_folder, project, institute, environment, domain, user, ext, start, end, category, - include_modified_date, project_files, debug, concurrent_studies, concurrent_files): + include_modified_date, project_files, debug, concurrent_studies, concurrent_files, folder_structure): """Download data from Philips Interventional Cloud. \b @@ -91,10 +97,10 @@ def download(target_folder, project, institute, environment, domain, user, ext, return - download_institutes(project, institute, target_folder, categories=category, files_filter=filter_by_ext(ext), - studies_filter=filter_by_study_date(start, end), + download_institutes(project, institute, target_folder, categories=category, + studies_filter=filter_by_study_date(start, end), files_filter=filter_by_ext(ext), include_modified_date=include_modified_date, max_workers_studies=concurrent_studies, - max_workers_files=concurrent_files) + max_workers_files=concurrent_files, folder_structure=folder_structure) @click.command(short_help="List data from Philips Interventional Cloud in CSV file") @@ -158,8 +164,14 @@ def _get_domain(domain, environment): @click.option('--debug', flag_value=True, help='Enable debug logging') @click.option('--concurrent-studies', type=int, default=None, help='Maximum number of concurrent studies upload') @click.option('--concurrent-files', type=int, default=None, help='Maximum number of concurrent files upload per study') +@click.option( + '--folder-structure', + default='flat', + type=click.Choice(['flat', 'hierarchical'], case_sensitive=False), + help='Folder structure of the data to be uploaded.' +) def upload(local_folder, project, institute, environment, domain, user, submit, debug, concurrent_studies, - concurrent_files): + concurrent_files, folder_structure): """Upload data to Philips Interventional Cloud. \b @@ -180,7 +192,7 @@ def upload(local_folder, project, institute, environment, domain, user, submit, with smart_auth(domain, username=user) as auth: set_auth(auth) logger.info(f"Using url: {auth.domain}") - upload_project(local_folder, project, institute, submit, concurrent_studies, concurrent_files) + upload_project(local_folder, project, institute, submit, concurrent_studies, concurrent_files, folder_structure) @click.command(short_help="Login to Philips Interventional Cloud") diff --git a/src/igtcloud/client/tools/download_institute.py b/src/igtcloud/client/tools/download_institute.py index 943c26f..dfc6925 100644 --- a/src/igtcloud/client/tools/download_institute.py +++ b/src/igtcloud/client/tools/download_institute.py @@ -18,7 +18,7 @@ def download_institutes(project_name: str, institute_name: str, destination: str, categories: List[str] = None, studies_filter: Callable[[RootStudy], bool] = None, files_filter: Callable[[File, RootStudy], bool] = None, include_modified_date: bool = False, - max_workers_studies: int = None, max_workers_files: int = None): + max_workers_studies: int = None, max_workers_files: int = None, folder_structure: str = None): project, institutes = find_project_and_institutes(project_name, institute_name) if not project: logger.error(f"Project not found: {project_name}") @@ -28,6 +28,13 @@ def download_institutes(project_name: str, institute_name: str, destination: str logger.error(f"No institutes found") return + categories = [category for category in categories or [] if category in ['files', 'dicom', 'annotations']] + if not categories: + categories = ['files'] + + if folder_structure not in ['flat', 'hierarchical']: + folder_structure = 'flat' + for institute in tqdm(institutes, desc="Institutes", unit="Institute"): logger.info(f"Institute name: {institute.name}, project type: {project.project_type_name}, " f"destination: {destination}") @@ -38,15 +45,11 @@ def download_institutes(project_name: str, institute_name: str, destination: str if callable(studies_filter): studies = list(filter(studies_filter, studies)) - categories = [category for category in categories or [] if category in ['files', 'dicom', 'annotations']] - if not categories: - categories = ['files'] - with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers_studies or 4) as executor: with tqdm(total=len(studies), desc="Studies", unit='study') as pbar: fs = {executor.submit(_download_study, study, - os.path.join(destination, institute.name, study.study_id_human_readable), + _create_study_destination_path(destination, institute, study, folder_structure), categories, files_filter, include_modified_date, @@ -61,6 +64,13 @@ def download_institutes(project_name: str, institute_name: str, destination: str logger.exception(f"Exception during download of {study.study_id_human_readable}") +def _create_study_destination_path(destination: str, institute, study, folder_structure) -> str: + study_destination = study.study_id_human_readable + study_destination = study_destination.replace('/', '---') if folder_structure == 'flat' else study_destination + + return os.path.join(destination, institute.name, os.path.normpath(study_destination)) + + def _download_study(study, study_destination, categories, files_filter, include_modified_date, max_workers_files: int): study_json_file = os.path.join(study_destination, 'study.json') os.makedirs(os.path.dirname(study_json_file), exist_ok=True) diff --git a/src/igtcloud/client/tools/upload_project.py b/src/igtcloud/client/tools/upload_project.py index bb4c31e..73134bd 100644 --- a/src/igtcloud/client/tools/upload_project.py +++ b/src/igtcloud/client/tools/upload_project.py @@ -21,7 +21,7 @@ def upload_project(local_folder: str, project_name: str, institute_name: str = None, submit: bool = False, - max_workers_studies: int = None, max_workers_files: int = None): + max_workers_studies: int = None, max_workers_files: int = None, folder_structure: str = None): project, institutes = find_project_and_institutes(project_name, institute_name) if not project and not institutes: @@ -46,6 +46,9 @@ def upload_project(local_folder: str, project_name: str, institute_name: str = N return + if folder_structure not in ['flat', 'hierarchical']: + folder_structure = 'flat' + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers_studies or 4) as executor: for institute in institutes: logger.info(f"Uploading to institute: {institute.name}") @@ -53,11 +56,26 @@ def upload_project(local_folder: str, project_name: str, institute_name: str = N institute_dir = os.path.join(local_folder, institute.name) existing_studies = institute.studies - local_study_folders = (os.path.join(institute_dir, d) for d in os.listdir(institute_dir) if - os.path.isdir(os.path.join(institute_dir, d))) + local_studies = {} + + for d in os.listdir(institute_dir): + if os.path.isdir(os.path.join(institute_dir, d)): + if folder_structure == 'flat': + study_dir = os.path.join(institute_dir, d) + patient_name = os.path.basename(study_dir) + + local_studies[study_dir] = patient_name + else: + patient_dir = os.path.join(institute_dir, d) + patient_name = os.path.basename(patient_dir) - fs = [executor.submit(upload_study, institute.study_type, study_folder, institute.id, existing_studies, - _password, max_workers_files) for study_folder in local_study_folders] + for study_dir in os.listdir(patient_dir): + study_dir = os.path.join(patient_dir, study_dir) + + local_studies[study_dir] = patient_name + + fs = [executor.submit(upload_study, institute.study_type, study_folder, local_studies[study_folder], + institute.id, existing_studies, _password, max_workers_files) for study_folder in local_studies] for f in tqdm(concurrent.futures.as_completed(fs), total=len(fs), desc="Studies", unit='study'): study, files_uploaded, files_skipped = f.result() @@ -65,12 +83,13 @@ def upload_project(local_folder: str, project_name: str, institute_name: str = N f"files_skipped: {len(files_skipped)}") -def upload_study(study_type: str, study_folder: str, institute_id: str, studies: CollectionWrapper[RootStudy], - _submit_password: str = None, max_workers_files: int = None) -> Tuple[RootStudy, List[str], List[str]]: +def upload_study(study_type: str, study_folder: str, patient_name: str, institute_id: str, + studies: CollectionWrapper[RootStudy], _submit_password: str = None, + max_workers_files: int = None) -> Tuple[RootStudy, List[str], List[str]]: local_study = None study_cls = entities_service.study_type_classes.get(study_type) study_json_file = os.path.join(study_folder, 'study.json') - patient_name = os.path.basename(study_folder) + if os.path.exists(study_json_file): try: with open(study_json_file, 'r') as f: