From edff50fc007d230c5082498301ada43bfa6aa23f Mon Sep 17 00:00:00 2001 From: aranega Date: Sat, 12 Oct 2024 13:14:14 -0600 Subject: [PATCH] Remove duplicated code for dev and prod --- .../visualizer/settings/development.py | 102 ------------------ 1 file changed, 102 deletions(-) diff --git a/applications/visualizer/backend/visualizer/settings/development.py b/applications/visualizer/backend/visualizer/settings/development.py index 2f4ab53..87e5caf 100644 --- a/applications/visualizer/backend/visualizer/settings/development.py +++ b/applications/visualizer/backend/visualizer/settings/development.py @@ -61,105 +61,3 @@ def get_metadata_files(self, dataset_id): RAW_DB_DATA_DOWNLOADER = DbDataDownloader METADATA_DOWNLOADER = DbDataDownloader - - -GCS_BUCKET = "celegans" -GCS_BUCKET_URL = f"https://storage.googleapis.com/{GCS_BUCKET}" -DB_RAW_DATA_FOLDER = "db-raw-data" - - -class DbDataDownloader: - def __init__(self): - self.session = Session(multiplexed=True) - - def get_summary(self): - summary_content = self.session.get( - f"{GCS_BUCKET_URL}/{DB_RAW_DATA_FOLDER}/summary.txt", allow_redirects=True - ) - if summary_content.status_code != 200: - raise Exception( - f"Error while pulling 'summary.txt' from the bucket: {summary_content}" - ) - assert summary_content.text, "The summary.txt looks empty" - return summary_content.text - - def pull_files(self): - summary = self.get_summary() - files = {} - print("Pulling DB data files from the bucket (multiplexed)...") - for bucket_file_path in summary.split(): - destination = BASE_DIR / bucket_file_path - print(f" . pulling gs://{GCS_BUCKET}/{bucket_file_path} to {destination}") - files[destination] = self.session.get( - f"{GCS_BUCKET_URL}/{bucket_file_path}", allow_redirects=True - ) - self.session.gather() - print("Writing the files...") - for file_path, result in files.items(): - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text(result.text) - - # We pull the segmentation metadata and the EM viewer metadata - self._pull_metadata() - - return BASE_DIR / DB_RAW_DATA_FOLDER - - def _pull_metadata(self): - db_data_folder = BASE_DIR / DB_RAW_DATA_FOLDER - datasets = json.loads((db_data_folder / "datasets.json").read_text()) - files = {} - print( - "Pulling EM viewer and segmentation config data files from the bucket (multiplexed)..." - ) - for dataset in datasets: - dataset_id = dataset["id"] - em_metadata = db_data_folder / dataset_id / "em_metadata.json" - segmentation_metadata = ( - db_data_folder / dataset_id / "segmentation_metadata.json" - ) - files[segmentation_metadata] = self._pull_segmentation_metadata(dataset_id) - files[em_metadata] = self._pull_em_metadata(dataset_id) - - for file_path, result in files.items(): - if result.status_code != 200 or not result.text: - print(f" [ ] no {file_path.name} data for {file_path.parent.name}") - continue - print( - f" [x] configuration found for {file_path.parent.name}, writing in {file_path}" - ) - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text(result.text) - - def _pull_segmentation_metadata(self, dataset_id): - url = f"{GCS_BUCKET_URL}/{dataset_id}/segmentations/metadata.json" - print(f" . pulling gs://{url}") - return self.session.get(url) - - def _pull_em_metadata(self, dataset_id): - url = f"{GCS_BUCKET_URL}/{dataset_id}/em/metadata.json" - print(f" . pulling gs://{url}") - return self.session.get(url) - - @classmethod - def get_segmentation_metadata(cls, dataset_id): - file = BASE_DIR / DB_RAW_DATA_FOLDER / dataset_id / "segmentation_metadata.json" - if not file.exists(): - return {} - return json.loads(file.read_text()) - - @classmethod - def get_em_metadata(cls, dataset_id): - file = BASE_DIR / DB_RAW_DATA_FOLDER / dataset_id / "em_metadata.json" - if not file.exists(): - return {} - return json.loads(file.read_text()) - - def get_metadata_files(self, dataset_id): - return ( - self.get_em_metadata(dataset_id), - self.get_segmentation_metadata(dataset_id), - ) - - -RAW_DB_DATA_DOWNLOADER = DbDataDownloader -METADATA_DOWNLOADER = DbDataDownloader