scripts

janelia-cellmap · Aug 1, 2024 · c146595 · c146595
1 parent 1969db7
commit c146595
Show file tree

Hide file tree

Showing 16 changed files with 913 additions and 0 deletions.
diff --git a/dacapo.yaml b/dacapo.yaml
@@ -0,0 +1,9 @@
+# compute_context:
+#   config:
+#     billing: cellmap
+#     num_cpus: 20
+#     num_gpus: 1
+#     queue: gpu_tesla
+#   type: Bsub
+runs_base_dir: /groups/cellmap/cellmap/zouinkhim/c-elegen/dacapo_files
+type: files
diff --git a/plots/output.png b/plots/output.png
diff --git a/scratch/check_crops.py b/scratch/check_crops.py
@@ -0,0 +1,15 @@
+
+
+# %%
+paths = ["/nrs/cellmap/data/empiar_c-elegans/empiar_c-elegans_upscaled.zarr",
+"/nrs/cellmap/data/empiar_fly-brain/empiar_fly-brain_upscaled.zarr",
+"/nrs/cellmap/data/empiar_glycolytic-muscle/empiar_glycolytic-muscle_upscaled.zarr",
+"/nrs/cellmap/data/empiar_hela-cell/empiar_hela-cell_upscaled.zarr",
+"/nrs/cellmap/data/empiar_lucchi-pp/empiar_lucchi-pp_upscaled.zarr",
+"/nrs/cellmap/data/empiar_salivary-gland/empiar_salivary-gland_upscaled.zarr"]
+import zarr
+for p in paths:
+    z = zarr.open(p)
+    print(p, z.tree())
+
+# %%
diff --git a/scratch/fix_config_yaml.py b/scratch/fix_config_yaml.py
@@ -0,0 +1,50 @@
+import yaml
+from yaml.loader import SafeLoader
+from yaml.dumper import SafeDumper
+
+def tuple_constructor(loader, node):
+    return tuple(loader.construct_sequence(node))
+
+def tuple_representer(dumper, data):
+    return dumper.represent_sequence('tag:yaml.org,2002:python/tuple', data)
+
+def replace_mito(data, to_change, new_value):
+    """
+    Recursively replace all instances of ['mito'] with mito in the data.
+    """
+    if isinstance(data, dict):
+        return {key: replace_mito(value,to_change, new_value) for key, value in data.items()}
+    elif isinstance(data, list):
+        return [replace_mito(item,to_change, new_value) for item in data]
+    elif isinstance(data, tuple):
+        return tuple(replace_mito(item,to_change, new_value) for item in data)
+    elif isinstance(data, str) and data == to_change:
+        return new_value
+    else:
+        return data
+
+def read_and_modify_yaml(input_file, output_file, to_change, new_value, overwrite=False):
+    """
+    Read a YAML file, replace ['mito'] with mito, and write the result to another YAML file.
+    """
+    with open(input_file, 'r') as infile:
+        data = yaml.load(infile, Loader=SafeLoader)
+
+    modified_data = replace_mito(data, to_change, new_value)
+
+    if overwrite:
+        output_file = input_file
+
+    with open(output_file, 'w') as outfile:
+        yaml.dump(modified_data, outfile, Dumper=SafeDumper)
+
+if __name__ == "__main__":
+    yaml.add_constructor('tag:yaml.org,2002:python/tuple', tuple_constructor, Loader=SafeLoader)
+    yaml.add_representer(tuple, tuple_representer, Dumper=SafeDumper)
+
+    input_file = '/groups/cellmap/cellmap/zouinkhim/c-elegen/dacapo_files/configs/runs/20240722_bw_op50_mito_setup04_0_v_using_all.yaml'  # Replace with your input file name
+    output_file = '/groups/cellmap/cellmap/zouinkhim/c-elegen/dacapo_files/configs/runs/20240722_bw_op50_mito_setup04_0_v_using_all.yaml' # Replace with your output file name
+    to_change = "['mito']"
+    new_value = "mito"
+
+    read_and_modify_yaml(input_file, output_file, to_change, new_value,overwrite=True)
diff --git a/scratch/generate_crops_yaml.py b/scratch/generate_crops_yaml.py
@@ -0,0 +1,52 @@
+# %%
+inputs_match = {"jrc_c-elegans-op50-1":["/nrs/cellmap/data/jrc_c-elegans-op50-1/jrc_celegans-op50-1_normalized.zarr","recon-1/em/fibsem-uint8/"],
+"jrc_c-elegans-bw-1":["/nrs/cellmap/data/jrc_c-elegans-bw-1/jrc_c-elegans-bw-1_normalized.zarr","recon-1/em/fibsem-uint8/"],
+"jrc_c-elegans-comma-1":["/nrs/cellmap/data/jrc_c-elegans-comma-1/jrc_c-elegans-comma-1.zarr","recon-1/em/fibsem-uint8"]}
+# %%
+
+datasets = ["jrc_c-elegans-op50-1","jrc_c-elegans-bw-1","jrc_c-elegans-comma-1"]
+
+gt_pattern = "/nrs/cellmap/data/{dataset}/staging/groundtruth.zarr"
+gt_dataset_pattern = "{crop_id}/{organelle}/"
+# %%
+organelles = ["mito","ld"]
+
+result_crops = {"mito":{},"ld":{}}
+for dataset in datasets:
+    gt_path = gt_pattern.format(dataset=dataset)
+    crops = [c for c in os.listdir(gt_path) if "crop" in c]
+    print(crops)
+    for crop in crops:
+        print(crop)
+        for organelle in organelles:
+            gt_dataset_path = gt_dataset_pattern.format(crop_id=crop,organelle=organelle)
+            if os.path.exists(os.path.join(gt_path,gt_dataset_path)):
+                if dataset in result_crops[organelle]:
+                    result_crops[organelle][dataset].append(crop)
+                else:
+                    result_crops[organelle][dataset] = [crop]
+                # result_crops[organelle].append({"dataset":dataset,"crop":crop})
+            else:
+                print(f"{gt_path}/{gt_dataset_path} does not exist")
+
+
+# %%
+result_crops
+# %%
+import csv
+
+groups = {"op50_bw-1":["jrc_c-elegans-op50-1","jrc_c-elegans-bw-1"],
+"all":["jrc_c-elegans-op50-1","jrc_c-elegans-bw-1","jrc_c-elegans-comma-1"]}
+for group in groups:
+    for organelle in organelles:
+        output_path = f"/groups/cellmap/cellmap/zouinkhim/c-elegen/csv_datasplit/20240726_{organelle}_{group}.csv"
+        with open(output_path, mode='w') as file:
+            writer = csv.writer(file)
+            for dataset in groups[group]:
+                crops = result_crops[organelle][dataset]
+                for crop in crops:
+                    # crop_container = f"{gt_pattern}/{dataset}/staging/groundtruth.zarr/crop{crop}/{organelle}"
+                    gt_container = gt_pattern.format(dataset=dataset)
+                    gt_dataset = gt_dataset_pattern.format(crop_id=crop,organelle=organelle)
+                    writer.writerow(["train",inputs_match[dataset][0],inputs_match[dataset][1],gt_container,gt_dataset])
+# %%
diff --git a/scratch/tmp_generate_yaml_crops.py b/scratch/tmp_generate_yaml_crops.py
@@ -0,0 +1,103 @@
+# %%
+import os
+import zarr
+path = "/nrs/cellmap/zubovy/temp/cma_test"
+
+os.listdir(path)
+# %%
+for f in os.listdir(path):
+    print(f)
+    z = zarr.open(f"{path}/{f}")
+    print(z.tree(level=1))
+# %%
+
+
+inputs = [
+    "jrc_c-elegans-bw-1/crop541",
+    "jrc_c-elegans-bw-1/crop540",
+    "jrc_c-elegans-comma-1/crop496",
+    "jrc_c-elegans-op50-1/crop521",
+    "jrc_c-elegans-op50-1/crop528",
+    "jrc_c-elegans-op50-1/crop532",
+    "jrc_c-elegans-op50-1/crop535",
+    "jrc_c-elegans-bw-1/crop527",
+    "jrc_c-elegans-bw-1/crop538",
+    "jrc_c-elegans-bw-1/crop536",
+    "jrc_c-elegans-bw-1/crop526",
+    "jrc_c-elegans-op50-1/crop533",
+    "jrc_c-elegans-comma-1/crop515",
+    "jrc_c-elegans-bw-1/crop519"]
+
+inputs_match = {"jrc_c-elegans-op50-1":"-f /nrs/cellmap/data/jrc_c-elegans-op50-1/jrc_celegans-op50-1_normalized.zarr -d recon-1/em/fibsem-uint8/",
+"jrc_c-elegans-bw-1":"-f /nrs/cellmap/data/jrc_c-elegans-bw-1/jrc_c-elegans-bw-1_normalized.zarr -d recon-1/em/fibsem-uint8/",
+"jrc_c-elegans-comma-1":"-f /nrs/cellmap/data/jrc_c-elegans-comma-1/jrc_c-elegans-comma-1.zarr -d recon-1/em/fibsem-uint8"}
+# %%
+result = {}
+for i in inputs:
+    key = i.split("/")[0]
+    if key not in result:
+        result[key] = []
+    result[key].append(i.split("/")[1])
+result
+
+# %%
+commands = []
+for key in result:
+    command = []
+    command.append(f"neuroglancer {inputs_match[key]}")
+
+    for crop in result[key]:
+        command.append(f" -f {path}/{crop}.zarr -d all")
+    commands.append("".join(command))
+
+
+# %%
+import subprocess
+subprocess.run(commands[2], shell=True)
+# %%
+
+crops_label = {"mito": [541,519,515,521,532,535,533],
+"ld": [540,527,538,536,526,496,528]}
+# %%
+inputs_match = {"jrc_c-elegans-op50-1":["/nrs/cellmap/data/jrc_c-elegans-op50-1/jrc_celegans-op50-1_normalized.zarr","recon-1/em/fibsem-uint8/"],
+"jrc_c-elegans-bw-1":["/nrs/cellmap/data/jrc_c-elegans-bw-1/jrc_c-elegans-bw-1_normalized.zarr","recon-1/em/fibsem-uint8/"],
+"jrc_c-elegans-comma-1":["/nrs/cellmap/data/jrc_c-elegans-comma-1/jrc_c-elegans-comma-1.zarr","recon-1/em/fibsem-uint8"]}
+# %%
+#  generate csv
+output_path = "/groups/cellmap/cellmap/zouinkhim/c-elegen/20240722_ld_all.csv"
+# %%
+import csv
+
+label = "ld"
+with open(output_path, mode='w') as file:
+    writer = csv.writer(file)
+    #  get dataset for crop
+    dataset = result
+    crops_ids = crops_label[label]
+
+    for crop in crops_ids:
+        # get the key that have the crop id in result
+        crop_key = f"crop{crop}"
+        dataset_key = [key for key in dataset if crop_key in dataset[key]][0]
+        print(dataset_key)
+
+        input_container = inputs_match[dataset_key][0]
+        input_dataset = inputs_match[dataset_key][1]
+        print(input_container)
+        print(input_dataset)
+        crop_container = f"{path}/{crop_key}.zarr"
+        crop_dataset = label
+        writer.writerow(["train",input_container,input_dataset,crop_container,crop_dataset])
+
+# %%
+os.listdir("/nrs/cellmap/data/jrc_c-elegans-bw-1/staging/groundtruth.zarr/crop519/ld/s0")
+ # %%
+
+
+datasets = ["jrc_c-elegans-op50-1","jrc_c-elegans-bw-1","jrc_c-elegans-comma-1"]
+
+gt_pattern = "/nrs/cellmap/data/{dataset}/staging/groundtruth.zarr/crop{crop_id}/{organelle}/"
+# %%
+organelles = ["mito","ld"]
+
+
diff --git a/scratch/update_datasplit_for_run.py b/scratch/update_datasplit_for_run.py
@@ -0,0 +1,62 @@
+
+# %%
+from dacapo.train import train_run
+from dacapo.experiments.run import Run
+from dacapo.store.create_store import create_config_store
+
+config_store = create_config_store()
+
+# run = Run(run_config)
+# run = Run(config_store.retrieve_run_config("test_mito_run"))
+# # we already trained it, so we will just load the weights
+# train_run(run)
+
+# %%
+config_store
+# %%
+config_store.retrieve_run_config_names()
+# %%
+run_config = config_store.retrieve_run_config("20240722_bw_op50_mito_setup04_0_v_using_all")
+# %%
+run = Run(run_config)
+# %%
+valite = run.datasplit.validate
+# %%
+config_store.retrieve_run_config_names()
+# %%
+config_store.retrieve_datasplit_config_names()
+# %%
+matches_run_datasplit = {'20240722_all_ld_setup04_0':"20240726_ld_all_8_4nm_semantic_ld_4nm",
+ '20240722_all_ld_setup04_1':"20240726_ld_all_8_4nm_semantic_ld_4nm",
+ '20240722_all_ld_setup04_2':"20240726_ld_all_8_4nm_semantic_ld_4nm",
+ '20240722_bw_op50_4nm_ld_setup04_0':"20240726_ld_op50_bw_8_4nm_semantic_ld_4nm",
+ '20240722_bw_op50_4nm_ld_setup04_1':"20240726_ld_op50_bw_8_4nm_semantic_ld_4nm",
+ '20240722_bw_op50_4nm_ld_setup04_2':"20240726_ld_op50_bw_8_4nm_semantic_ld_4nm",
+ '20240722_bw_op50_8nm_ld_setup04_0':"20240726_ld_op50_bw_16_8nm_semantic_ld_8nm",
+ '20240722_bw_op50_8nm_ld_setup04_1':"20240726_ld_op50_bw_16_8nm_semantic_ld_8nm",
+ '20240722_bw_op50_8nm_ld_setup04_2':"20240726_ld_op50_bw_16_8nm_semantic_ld_8nm",
+ '20240722_bw_op50_mito_setup04_0':"20240726_mito_op50_bw-1_8_4nm_semantic_mito_4nm",
+ '20240722_bw_op50_mito_setup04_0_v_using_all':"20240726_mito_all_8_4nm_semantic_mito_4nm",
+ '20240722_bw_op50_mito_setup04_1':"20240726_mito_op50_bw-1_8_4nm_semantic_mito_4nm",
+ '20240722_bw_op50_mito_setup04_1_v_using_all':"20240726_mito_all_8_4nm_semantic_mito_4nm",
+ '20240722_bw_op50_mito_setup04_2':"20240726_mito_op50_bw-1_16_8nm_semantic_mito_8nm",
+ '20240722_bw_op50_mito_setup04_2_v_using_all':"20240726_mito_all_16_8nm_semantic_mito_8nm",
+#  '20240722_bw_op50_mito_setup04_5_v_using_all':"20240726_mito_all_16_8nm_semantic_mito_8nm",
+ }
+def update_run_datasplit(run_name,datasplit_name):
+    run_config = config_store.retrieve_run_config(run_name)
+    datasplit_config = config_store.retrieve_datasplit_config(datasplit_name)
+    run_config.datasplit_config = datasplit_config
+    config_store.delete_run_config(run_name)
+    config_store.store_run_config(run_config)
+# %%
+for run,datasplit in matches_run_datasplit.items():
+    print(run)
+    update_run_datasplit(run,datasplit)
+# %%
+# omport subprocess
+runs = matches_run_datasplit.keys()
+print(list(runs))
+# %%
+config_store.path
+# %%
diff --git a/scripts/dacapo.yaml b/scripts/dacapo.yaml
@@ -0,0 +1,9 @@
+# compute_context:
+#   config:
+#     billing: cellmap
+#     num_cpus: 20
+#     num_gpus: 1
+#     queue: gpu_tesla
+#   type: Bsub
+runs_base_dir: /groups/cellmap/cellmap/zouinkhim/c-elegen/dacapo_files
+type: files