Skip to content

Commit

Permalink
scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
mzouink committed Aug 1, 2024
1 parent 1969db7 commit c146595
Show file tree
Hide file tree
Showing 16 changed files with 913 additions and 0 deletions.
9 changes: 9 additions & 0 deletions dacapo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# compute_context:
# config:
# billing: cellmap
# num_cpus: 20
# num_gpus: 1
# queue: gpu_tesla
# type: Bsub
runs_base_dir: /groups/cellmap/cellmap/zouinkhim/c-elegen/dacapo_files
type: files
Binary file added plots/output.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 15 additions & 0 deletions scratch/check_crops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@


# %%
paths = ["/nrs/cellmap/data/empiar_c-elegans/empiar_c-elegans_upscaled.zarr",
"/nrs/cellmap/data/empiar_fly-brain/empiar_fly-brain_upscaled.zarr",
"/nrs/cellmap/data/empiar_glycolytic-muscle/empiar_glycolytic-muscle_upscaled.zarr",
"/nrs/cellmap/data/empiar_hela-cell/empiar_hela-cell_upscaled.zarr",
"/nrs/cellmap/data/empiar_lucchi-pp/empiar_lucchi-pp_upscaled.zarr",
"/nrs/cellmap/data/empiar_salivary-gland/empiar_salivary-gland_upscaled.zarr"]
import zarr
for p in paths:
z = zarr.open(p)
print(p, z.tree())

# %%
50 changes: 50 additions & 0 deletions scratch/fix_config_yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import yaml
from yaml.loader import SafeLoader
from yaml.dumper import SafeDumper

def tuple_constructor(loader, node):
return tuple(loader.construct_sequence(node))

def tuple_representer(dumper, data):
return dumper.represent_sequence('tag:yaml.org,2002:python/tuple', data)

def replace_mito(data, to_change, new_value):
"""
Recursively replace all instances of ['mito'] with mito in the data.
"""
if isinstance(data, dict):
return {key: replace_mito(value,to_change, new_value) for key, value in data.items()}
elif isinstance(data, list):
return [replace_mito(item,to_change, new_value) for item in data]
elif isinstance(data, tuple):
return tuple(replace_mito(item,to_change, new_value) for item in data)
elif isinstance(data, str) and data == to_change:
return new_value
else:
return data

def read_and_modify_yaml(input_file, output_file, to_change, new_value, overwrite=False):
"""
Read a YAML file, replace ['mito'] with mito, and write the result to another YAML file.
"""
with open(input_file, 'r') as infile:
data = yaml.load(infile, Loader=SafeLoader)

modified_data = replace_mito(data, to_change, new_value)

if overwrite:
output_file = input_file

with open(output_file, 'w') as outfile:
yaml.dump(modified_data, outfile, Dumper=SafeDumper)

if __name__ == "__main__":
yaml.add_constructor('tag:yaml.org,2002:python/tuple', tuple_constructor, Loader=SafeLoader)
yaml.add_representer(tuple, tuple_representer, Dumper=SafeDumper)

input_file = '/groups/cellmap/cellmap/zouinkhim/c-elegen/dacapo_files/configs/runs/20240722_bw_op50_mito_setup04_0_v_using_all.yaml' # Replace with your input file name
output_file = '/groups/cellmap/cellmap/zouinkhim/c-elegen/dacapo_files/configs/runs/20240722_bw_op50_mito_setup04_0_v_using_all.yaml' # Replace with your output file name
to_change = "['mito']"
new_value = "mito"

read_and_modify_yaml(input_file, output_file, to_change, new_value,overwrite=True)
52 changes: 52 additions & 0 deletions scratch/generate_crops_yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# %%
inputs_match = {"jrc_c-elegans-op50-1":["/nrs/cellmap/data/jrc_c-elegans-op50-1/jrc_celegans-op50-1_normalized.zarr","recon-1/em/fibsem-uint8/"],
"jrc_c-elegans-bw-1":["/nrs/cellmap/data/jrc_c-elegans-bw-1/jrc_c-elegans-bw-1_normalized.zarr","recon-1/em/fibsem-uint8/"],
"jrc_c-elegans-comma-1":["/nrs/cellmap/data/jrc_c-elegans-comma-1/jrc_c-elegans-comma-1.zarr","recon-1/em/fibsem-uint8"]}
# %%

datasets = ["jrc_c-elegans-op50-1","jrc_c-elegans-bw-1","jrc_c-elegans-comma-1"]

gt_pattern = "/nrs/cellmap/data/{dataset}/staging/groundtruth.zarr"
gt_dataset_pattern = "{crop_id}/{organelle}/"
# %%
organelles = ["mito","ld"]

result_crops = {"mito":{},"ld":{}}
for dataset in datasets:
gt_path = gt_pattern.format(dataset=dataset)
crops = [c for c in os.listdir(gt_path) if "crop" in c]
print(crops)
for crop in crops:
print(crop)
for organelle in organelles:
gt_dataset_path = gt_dataset_pattern.format(crop_id=crop,organelle=organelle)
if os.path.exists(os.path.join(gt_path,gt_dataset_path)):
if dataset in result_crops[organelle]:
result_crops[organelle][dataset].append(crop)
else:
result_crops[organelle][dataset] = [crop]
# result_crops[organelle].append({"dataset":dataset,"crop":crop})
else:
print(f"{gt_path}/{gt_dataset_path} does not exist")


# %%
result_crops
# %%
import csv

groups = {"op50_bw-1":["jrc_c-elegans-op50-1","jrc_c-elegans-bw-1"],
"all":["jrc_c-elegans-op50-1","jrc_c-elegans-bw-1","jrc_c-elegans-comma-1"]}
for group in groups:
for organelle in organelles:
output_path = f"/groups/cellmap/cellmap/zouinkhim/c-elegen/csv_datasplit/20240726_{organelle}_{group}.csv"
with open(output_path, mode='w') as file:
writer = csv.writer(file)
for dataset in groups[group]:
crops = result_crops[organelle][dataset]
for crop in crops:
# crop_container = f"{gt_pattern}/{dataset}/staging/groundtruth.zarr/crop{crop}/{organelle}"
gt_container = gt_pattern.format(dataset=dataset)
gt_dataset = gt_dataset_pattern.format(crop_id=crop,organelle=organelle)
writer.writerow(["train",inputs_match[dataset][0],inputs_match[dataset][1],gt_container,gt_dataset])
# %%
103 changes: 103 additions & 0 deletions scratch/tmp_generate_yaml_crops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# %%
import os
import zarr
path = "/nrs/cellmap/zubovy/temp/cma_test"

os.listdir(path)
# %%
for f in os.listdir(path):
print(f)
z = zarr.open(f"{path}/{f}")
print(z.tree(level=1))
# %%


inputs = [
"jrc_c-elegans-bw-1/crop541",
"jrc_c-elegans-bw-1/crop540",
"jrc_c-elegans-comma-1/crop496",
"jrc_c-elegans-op50-1/crop521",
"jrc_c-elegans-op50-1/crop528",
"jrc_c-elegans-op50-1/crop532",
"jrc_c-elegans-op50-1/crop535",
"jrc_c-elegans-bw-1/crop527",
"jrc_c-elegans-bw-1/crop538",
"jrc_c-elegans-bw-1/crop536",
"jrc_c-elegans-bw-1/crop526",
"jrc_c-elegans-op50-1/crop533",
"jrc_c-elegans-comma-1/crop515",
"jrc_c-elegans-bw-1/crop519"]

inputs_match = {"jrc_c-elegans-op50-1":"-f /nrs/cellmap/data/jrc_c-elegans-op50-1/jrc_celegans-op50-1_normalized.zarr -d recon-1/em/fibsem-uint8/",
"jrc_c-elegans-bw-1":"-f /nrs/cellmap/data/jrc_c-elegans-bw-1/jrc_c-elegans-bw-1_normalized.zarr -d recon-1/em/fibsem-uint8/",
"jrc_c-elegans-comma-1":"-f /nrs/cellmap/data/jrc_c-elegans-comma-1/jrc_c-elegans-comma-1.zarr -d recon-1/em/fibsem-uint8"}
# %%
result = {}
for i in inputs:
key = i.split("/")[0]
if key not in result:
result[key] = []
result[key].append(i.split("/")[1])
result

# %%
commands = []
for key in result:
command = []
command.append(f"neuroglancer {inputs_match[key]}")

for crop in result[key]:
command.append(f" -f {path}/{crop}.zarr -d all")
commands.append("".join(command))


# %%
import subprocess
subprocess.run(commands[2], shell=True)
# %%

crops_label = {"mito": [541,519,515,521,532,535,533],
"ld": [540,527,538,536,526,496,528]}
# %%
inputs_match = {"jrc_c-elegans-op50-1":["/nrs/cellmap/data/jrc_c-elegans-op50-1/jrc_celegans-op50-1_normalized.zarr","recon-1/em/fibsem-uint8/"],
"jrc_c-elegans-bw-1":["/nrs/cellmap/data/jrc_c-elegans-bw-1/jrc_c-elegans-bw-1_normalized.zarr","recon-1/em/fibsem-uint8/"],
"jrc_c-elegans-comma-1":["/nrs/cellmap/data/jrc_c-elegans-comma-1/jrc_c-elegans-comma-1.zarr","recon-1/em/fibsem-uint8"]}
# %%
# generate csv
output_path = "/groups/cellmap/cellmap/zouinkhim/c-elegen/20240722_ld_all.csv"
# %%
import csv

label = "ld"
with open(output_path, mode='w') as file:
writer = csv.writer(file)
# get dataset for crop
dataset = result
crops_ids = crops_label[label]

for crop in crops_ids:
# get the key that have the crop id in result
crop_key = f"crop{crop}"
dataset_key = [key for key in dataset if crop_key in dataset[key]][0]
print(dataset_key)

input_container = inputs_match[dataset_key][0]
input_dataset = inputs_match[dataset_key][1]
print(input_container)
print(input_dataset)
crop_container = f"{path}/{crop_key}.zarr"
crop_dataset = label
writer.writerow(["train",input_container,input_dataset,crop_container,crop_dataset])

# %%
os.listdir("/nrs/cellmap/data/jrc_c-elegans-bw-1/staging/groundtruth.zarr/crop519/ld/s0")
# %%


datasets = ["jrc_c-elegans-op50-1","jrc_c-elegans-bw-1","jrc_c-elegans-comma-1"]

gt_pattern = "/nrs/cellmap/data/{dataset}/staging/groundtruth.zarr/crop{crop_id}/{organelle}/"
# %%
organelles = ["mito","ld"]


62 changes: 62 additions & 0 deletions scratch/update_datasplit_for_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@

# %%
from dacapo.train import train_run
from dacapo.experiments.run import Run
from dacapo.store.create_store import create_config_store

config_store = create_config_store()

# run = Run(run_config)
# run = Run(config_store.retrieve_run_config("test_mito_run"))
# # we already trained it, so we will just load the weights
# train_run(run)

# %%
config_store
# %%
config_store.retrieve_run_config_names()
# %%
run_config = config_store.retrieve_run_config("20240722_bw_op50_mito_setup04_0_v_using_all")
# %%
run = Run(run_config)
# %%
valite = run.datasplit.validate
# %%
config_store.retrieve_run_config_names()
# %%
config_store.retrieve_datasplit_config_names()
# %%
matches_run_datasplit = {'20240722_all_ld_setup04_0':"20240726_ld_all_8_4nm_semantic_ld_4nm",
'20240722_all_ld_setup04_1':"20240726_ld_all_8_4nm_semantic_ld_4nm",
'20240722_all_ld_setup04_2':"20240726_ld_all_8_4nm_semantic_ld_4nm",
'20240722_bw_op50_4nm_ld_setup04_0':"20240726_ld_op50_bw_8_4nm_semantic_ld_4nm",
'20240722_bw_op50_4nm_ld_setup04_1':"20240726_ld_op50_bw_8_4nm_semantic_ld_4nm",
'20240722_bw_op50_4nm_ld_setup04_2':"20240726_ld_op50_bw_8_4nm_semantic_ld_4nm",
'20240722_bw_op50_8nm_ld_setup04_0':"20240726_ld_op50_bw_16_8nm_semantic_ld_8nm",
'20240722_bw_op50_8nm_ld_setup04_1':"20240726_ld_op50_bw_16_8nm_semantic_ld_8nm",
'20240722_bw_op50_8nm_ld_setup04_2':"20240726_ld_op50_bw_16_8nm_semantic_ld_8nm",
'20240722_bw_op50_mito_setup04_0':"20240726_mito_op50_bw-1_8_4nm_semantic_mito_4nm",
'20240722_bw_op50_mito_setup04_0_v_using_all':"20240726_mito_all_8_4nm_semantic_mito_4nm",
'20240722_bw_op50_mito_setup04_1':"20240726_mito_op50_bw-1_8_4nm_semantic_mito_4nm",
'20240722_bw_op50_mito_setup04_1_v_using_all':"20240726_mito_all_8_4nm_semantic_mito_4nm",
'20240722_bw_op50_mito_setup04_2':"20240726_mito_op50_bw-1_16_8nm_semantic_mito_8nm",
'20240722_bw_op50_mito_setup04_2_v_using_all':"20240726_mito_all_16_8nm_semantic_mito_8nm",
# '20240722_bw_op50_mito_setup04_5_v_using_all':"20240726_mito_all_16_8nm_semantic_mito_8nm",
}
def update_run_datasplit(run_name,datasplit_name):
run_config = config_store.retrieve_run_config(run_name)
datasplit_config = config_store.retrieve_datasplit_config(datasplit_name)
run_config.datasplit_config = datasplit_config
config_store.delete_run_config(run_name)
config_store.store_run_config(run_config)
# %%
for run,datasplit in matches_run_datasplit.items():
print(run)
update_run_datasplit(run,datasplit)
# %%
# omport subprocess
runs = matches_run_datasplit.keys()
print(list(runs))
# %%
config_store.path
# %%
9 changes: 9 additions & 0 deletions scripts/dacapo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# compute_context:
# config:
# billing: cellmap
# num_cpus: 20
# num_gpus: 1
# queue: gpu_tesla
# type: Bsub
runs_base_dir: /groups/cellmap/cellmap/zouinkhim/c-elegen/dacapo_files
type: files
Loading

0 comments on commit c146595

Please sign in to comment.