From eec31765831e5c24f42ffc099a29056070c8c182 Mon Sep 17 00:00:00 2001 From: Maximilian Mordig Date: Tue, 31 Oct 2023 19:20:50 +0100 Subject: [PATCH] adding cli, other fixes --- README.md | 2 +- pyproject.toml | 8 +- .../seqsum_tools/seqsum_plotting.py | 6 +- src/simreaduntil/simulator/README.md | 3 + .../simulator/protos/ont_device.proto | 4 +- src/simreaduntil/simulator/simulator.py | 10 +- .../simulator/simulator_client.py | 40 +++-- .../simulator/simulator_server.py | 10 +- .../usecase_helpers/cli_usecase/__init__.py | 3 + .../cli_usecase/generate_random_reads.py | 70 +++++++++ .../cli_usecase/sim_plots_cli.py | 52 +++++++ .../cli_usecase/simulator_client_cli.py | 86 ++++++++++ .../cli_usecase/simulator_server_cli.py | 147 ++++++++++++++++++ src/simreaduntil/usecase_helpers/utils.py | 46 ++++-- tests/seqsum_tools/data/README.md | 2 +- usecases/README.md | 41 ++++- usecases/compare_replication_methods.ipynb | 61 ++++---- usecases/plot_existing_seqsum.py | 6 +- 18 files changed, 525 insertions(+), 72 deletions(-) create mode 100644 src/simreaduntil/usecase_helpers/cli_usecase/__init__.py create mode 100644 src/simreaduntil/usecase_helpers/cli_usecase/generate_random_reads.py create mode 100644 src/simreaduntil/usecase_helpers/cli_usecase/sim_plots_cli.py create mode 100644 src/simreaduntil/usecase_helpers/cli_usecase/simulator_client_cli.py create mode 100644 src/simreaduntil/usecase_helpers/cli_usecase/simulator_server_cli.py diff --git a/README.md b/README.md index 06d77c9..cb570de 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This repository provides a simulator for an ONT device controlled by the ReadUntil API either directly or via gRPC, and can be accelerated (e.g. factor 10 with 512 channels). It takes full-length reads as input, plays them back with suitable gaps in between, and responds to ReadUntil actions. The code is well-tested with `pytest` and an example usecase combining the simulator with ReadFish and NanoSim is provided. -Access the documentation [here](https://ratschlab.github.io/ont_project/). +Access the documentation [here](https://ratschlab.github.io/sim_read_until/). See below for a [quick start](#quick-start). diff --git a/pyproject.toml b/pyproject.toml index 384ea05..02dc9a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,10 +72,16 @@ readfish = [ ] [project.scripts] -plot_sim_seqsum = "simreaduntil.seqsum_tools.seqsum_plotting:main" +plot_seqsum = "simreaduntil.seqsum_tools.seqsum_plotting:main" simfasta_to_seqsum = "simreaduntil.simulator.simfasta_to_seqsum:main" simulator_with_readfish = "simreaduntil.usecase_helpers.simulator_with_readfish:main" normalize_fasta = "simreaduntil.usecase_helpers.utils:normalize_fasta_cmd" +# helpers for the usecase +usecase_generate_random_reads = "simreaduntil.usecase_helpers.cli_usecase.generate_random_reads:main" +simulator_server_cli = "simreaduntil.usecase_helpers.cli_usecase.simulator_server_cli:main" +usecase_simulator_client_cli = "simreaduntil.usecase_helpers.cli_usecase.simulator_client_cli:main" +sim_plots_cli = "simreaduntil.usecase_helpers.cli_usecase.sim_plots_cli:main" + [project.urls] "Homepage" = "https://github.com/ratschlab/sim_read_until" diff --git a/src/simreaduntil/seqsum_tools/seqsum_plotting.py b/src/simreaduntil/seqsum_tools/seqsum_plotting.py index 3edb192..8d61b4d 100644 --- a/src/simreaduntil/seqsum_tools/seqsum_plotting.py +++ b/src/simreaduntil/seqsum_tools/seqsum_plotting.py @@ -996,7 +996,7 @@ def main(): """ CLI entrypoint to create plots from a sequencing summary file """ - add_comprehensive_stream_handler_to_logger(logging.DEBUG) + add_comprehensive_stream_handler_to_logger(None, logging.DEBUG) if is_test_mode(): args = argparse.Namespace() @@ -1009,10 +1009,10 @@ def main(): args.paf_file = None else: parser = argparse.ArgumentParser(description="Plotting script for sequencing summary file from the simulator") - parser.add_argument("ref_genome_path", type=Path, help="Path to the reference genome") parser.add_argument("seqsummary_filename", type=Path, help="Path to the sequencing summary file") parser.add_argument("--nrows", type=int, default=None, help="Number of rows to read from the sequencing summary file") - parser.add_argument("--save_dir", type=Path, default=None, help="Directory to save plots to") + parser.add_argument("--ref_genome_path", type=Path, help="Path to the reference genome", default=None) + parser.add_argument("--save_dir", type=Path, default=None, help="Directory to save plots to; display if None") parser.add_argument("--cov_thresholds", type=str, default="1,2,3,4,5,6", help="Comma-separated list of target coverages to plot; set to '' if non-NanoSim reads") parser.add_argument("--targets", type=str, default=None, help="if provided, a comma-separated list, e.g. 'chr1,chr2'. Creates two groups on the plot, opposing targets to the rest") parser.add_argument("--cov_every", type=int, default=1, help="Compute coverage every x reads") diff --git a/src/simreaduntil/simulator/README.md b/src/simreaduntil/simulator/README.md index def1359..d51f4bc 100644 --- a/src/simreaduntil/simulator/README.md +++ b/src/simreaduntil/simulator/README.md @@ -40,6 +40,9 @@ mkdir protos_generated # replace imports so they work with the project structure ("ont_simulator" must be on the python path) python -m grpc_tools.protoc -Iprotos/ --python_out=protos_generated/ --pyi_out=protos_generated/ --grpc_python_out=protos_generated/ protos/ont_device.proto && \ sed -i -E "s%import (.*)_pb2 as%import simreaduntil.simulator.protos_generated.\1_pb2 as%g" protos_generated/ont_device_pb2_grpc.py + +# todo: check +cd src && python -m grpc_tools.protoc -Isimreaduntil/simulator/protos/ --python_out=simreaduntil/simulator/protos_generated/ --pyi_out=simreaduntil/simulator/protos_generated/ --grpc_python_out=simreaduntil/simulator/protos_generated/ simreaduntil/simulator/protos/ont_device.proto ``` diff --git a/src/simreaduntil/simulator/protos/ont_device.proto b/src/simreaduntil/simulator/protos/ont_device.proto index ef77f98..85d2cef 100644 --- a/src/simreaduntil/simulator/protos/ont_device.proto +++ b/src/simreaduntil/simulator/protos/ont_device.proto @@ -56,7 +56,7 @@ message ReadActionsRequest { message StopReceivingAction {} message UnblockAction { - double unblock_duration = 1; + double unblock_duration = 1; // set to <0 to take default, otherwise sets to 0! } uint32 channel = 1; @@ -86,7 +86,7 @@ message ActionResultResponse { } message StartRequest { - double acceleration_factor = 1; + double acceleration_factor = 1; // <= 0 interpreted as 1.0 string update_method = 2; uint32 log_interval = 3; bool stop_if_no_reads = 4; diff --git a/src/simreaduntil/simulator/simulator.py b/src/simreaduntil/simulator/simulator.py index 6cb59cf..82d0294 100644 --- a/src/simreaduntil/simulator/simulator.py +++ b/src/simreaduntil/simulator/simulator.py @@ -100,7 +100,7 @@ def _check_channels_available(self, channel_subset: List[int]=None): if not set(channel_subset).issubset(self._onebased_available_channels()): raise InexistentChannelsException(message=f"Tried to use {channel_subset}, but only channels {self._onebased_available_channels()} are available (channels are 1-based!)") - def start(self): + def start(self, **kwargs): """ Start the sequencing. """ @@ -141,7 +141,7 @@ def get_basecalled_read_chunks(self, batch_size=None, channel_subset=None) -> Li """ raise NotImplementedError() - def get_action_results(self) -> List[Tuple[Any, float, int, str, Any]]: + def get_action_results(self, **kwargs) -> List[Tuple[Any, float, int, str, Any]]: """ Get new results of actions that were performed with unblock and stop_receiving (mux scans etc not included) @@ -409,7 +409,7 @@ def start(self, *args, **kwargs): return True - def _forward_sim_loop(self, acceleration_factor=1, update_method="realtime", log_interval: int=10, stop_if_no_reads=True, **kwargs): + def _forward_sim_loop(self, acceleration_factor=1.0, update_method="realtime", log_interval: int=10, stop_if_no_reads=True, **kwargs): """ Helper method launched by .start() to forward the simulation. @@ -444,7 +444,7 @@ def _log(): logger.debug(f"Simulation has been running for real {cur_ns_time() - t_real_start:.2f} seconds with acceleration factor {acceleration_factor:.2f} (t_sim={t_sim:.2f}, i={i})") combined_channel_statuses = self.get_channel_stats(combined=True) - logger.debug("\nCombined channel status: " + str(combined_channel_statuses)) + logger.info("\nCombined channel status: " + str(combined_channel_statuses)) if self._channel_status_filename is not None: print(combined_channel_statuses.get_table_line(), file=self._channel_status_fh) @@ -618,6 +618,7 @@ def unblock_read(self, read_channel, read_id, unblock_duration=None) -> Optional self._check_channels_available([read_channel]) action_res = self._channels[read_channel-1].unblock(unblock_duration=unblock_duration, read_id=read_id) self._action_results.append((read_id, self._channels[read_channel-1].t, read_channel, ActionType.Unblock, action_res)) + logger.info(f"Unblocking read {read_id} on channel {read_channel}, result: {action_res.to_str()}") return action_res def stop_receiving_read(self, read_channel, read_id) -> Optional[StoppedReceivingResponse]: @@ -625,6 +626,7 @@ def stop_receiving_read(self, read_channel, read_id) -> Optional[StoppedReceivin self._check_channels_available([read_channel]) action_res = self._channels[read_channel-1].stop_receiving(read_id=read_id) self._action_results.append((read_id, self._channels[read_channel-1].t, read_channel, ActionType.StopReceiving, action_res)) + logger.info(f"Stopping receiving from read {read_id} on channel {read_channel}, result: {action_res.to_str()}") return action_res def run_mux_scan(self, t_duration: float) -> int: diff --git a/src/simreaduntil/simulator/simulator_client.py b/src/simreaduntil/simulator/simulator_client.py index ebe44e3..4e93eac 100644 --- a/src/simreaduntil/simulator/simulator_client.py +++ b/src/simreaduntil/simulator/simulator_client.py @@ -91,12 +91,12 @@ def unique_id(self) -> str: """ return self._stub.GetServerInfo(ont_device_pb2.EmptyRequest()).unique_id - def start(self, acceleration_factor: float = 1.0): + def start(self, acceleration_factor: float = 1.0, update_method: str ="realtime", log_interval: int=10, stop_if_no_reads: bool = True): """ Start the sequencing. """ self._check_connected() - return self._stub.StartSim(ont_device_pb2.StartRequest(acceleration_factor=acceleration_factor)).value + return self._stub.StartSim(ont_device_pb2.StartRequest(acceleration_factor=acceleration_factor, update_method=update_method, log_interval=log_interval, stop_if_no_reads=stop_if_no_reads)).value def stop(self): """ @@ -110,7 +110,7 @@ def run_mux_scan(self, t_duration: float): Run mux scan """ self._check_connected() - return self._stub.RunMuxScan(ont_device_pb2.RunMuxScanRequest()).nb_reads_rejected + return self._stub.RunMuxScan(ont_device_pb2.RunMuxScanRequest(t_duration=t_duration)).nb_reads_rejected @property def is_running(self): @@ -129,6 +129,7 @@ def get_basecalled_read_chunks(self, batch_size=None, channel_subset=None): channels = None else: channels = ont_device_pb2.BasecalledChunksRequest.Channels(value=channel_subset) + # batch_size = None does not set it, so proto3 assigns it a value of 0 for chunk in self._stub.GetBasecalledChunks(ont_device_pb2.BasecalledChunksRequest(batch_size=batch_size, channels=channels)): yield (chunk.channel, chunk.read_id, chunk.seq, chunk.quality_seq, chunk.estimated_ref_len_so_far) @@ -145,19 +146,40 @@ def unblock_read(self, read_channel, read_id, unblock_duration=None): """ Unblock read_id on channel; returns whether the action was performed (not performed if the read was already over) """ - self._check_connected() - return self._stub.PerformActions(ont_device_pb2.ReadActionsRequest(actions=[ - ont_device_pb2.ReadActionsRequest.Action(channel=read_channel, read_id=read_id, unblock=ont_device_pb2.ReadActionsRequest.Action.UnblockAction(unblock_duration=unblock_duration)) - ])).succeeded[0] + # self._check_connected() + # return self._stub.PerformActions(ont_device_pb2.ReadActionsRequest(actions=[ + # ont_device_pb2.ReadActionsRequest.Action(channel=read_channel, read_id=read_id, unblock=ont_device_pb2.ReadActionsRequest.Action.UnblockAction(unblock_duration=unblock_duration if unblock_duration is not None else -1)) + # ])).succeeded[0] + return self.unblock_read_batch([(read_channel, read_id)], unblock_duration=unblock_duration)[0] def stop_receiving_read(self, read_channel, read_id): """ Stop receiving read_id on channel; returns whether the action was performed (not performed if the read was already over) """ + # self._check_connected() + # return self._stub.PerformActions(ont_device_pb2.ReadActionsRequest(actions=[ + # ont_device_pb2.ReadActionsRequest.Action(channel=read_channel, read_id=read_id, stop_further_data=ont_device_pb2.ReadActionsRequest.Action.StopReceivingAction()), + # ])).succeeded[0] + return self.stop_receiving_read_batch([(read_channel, read_id)])[0] + + # batch methods + def unblock_read_batch(self, channel_and_ids, unblock_duration=None): + """ + Unblock a batch of reads on channel; returns whether the actions were performed (not performed if the read was already over) + """ + self._check_connected() + return self._stub.PerformActions(ont_device_pb2.ReadActionsRequest(actions=[ + ont_device_pb2.ReadActionsRequest.Action(channel=read_channel, read_id=read_id, unblock=ont_device_pb2.ReadActionsRequest.Action.UnblockAction(unblock_duration=unblock_duration if unblock_duration is not None else -1)) + for (read_channel, read_id) in channel_and_ids])).succeeded + + def stop_receiving_read_batch(self, channel_and_ids): + """ + Stop receiving a batch of reads on channel; returns whether the actions were performed (not performed if the read was already over) + """ self._check_connected() return self._stub.PerformActions(ont_device_pb2.ReadActionsRequest(actions=[ - ont_device_pb2.ReadActionsRequest.Action(channel=read_channel, read_id=read_id, stop_further_data=ont_device_pb2.ReadActionsRequest.Action.StopReceivingAction()), - ])).succeeded[0] + ont_device_pb2.ReadActionsRequest.Action(channel=read_channel, read_id=read_id, stop_further_data=ont_device_pb2.ReadActionsRequest.Action.StopReceivingAction()) + for (read_channel, read_id) in channel_and_ids])).succeeded @property def mk_run_dir(self): diff --git a/src/simreaduntil/simulator/simulator_server.py b/src/simreaduntil/simulator/simulator_server.py index 76517c2..764b979 100644 --- a/src/simreaduntil/simulator/simulator_server.py +++ b/src/simreaduntil/simulator/simulator_server.py @@ -98,7 +98,8 @@ def PerformActions(self, request, context): for action_desc in request.actions: channel, read_id = action_desc.channel, action_desc.read_id if action_desc.WhichOneof("action") == "unblock": - res.append(self.device.unblock_read(channel, read_id=read_id, unblock_duration=action_desc.unblock.unblock_duration)) + unblock_duration = None if action_desc.unblock.unblock_duration < 0 else action_desc.unblock.unblock_duration + res.append(self.device.unblock_read(channel, read_id=read_id, unblock_duration=unblock_duration)) else: res.append(self.device.stop_receiving_read(channel, read_id=read_id)) #todo2: current conversion from enum 0,1,2 to bool is not ideal return ont_device_pb2.ActionResultImmediateResponse(succeeded=res) @@ -112,7 +113,8 @@ def GetActionResults(self, request, context): def GetBasecalledChunks(self, request, context): # channel_subset=None on request side means that field was not set channel_subset = request.channels.value if request.HasField("channels") else None - for (channel, read_id, seq, quality_seq, estimated_ref_len_so_far) in self.device.get_basecalled_read_chunks(batch_size=request.batch_size, channel_subset=channel_subset): + batch_size = request.batch_size if request.batch_size > 0 else None + for (channel, read_id, seq, quality_seq, estimated_ref_len_so_far) in self.device.get_basecalled_read_chunks(batch_size=batch_size, channel_subset=channel_subset): yield ont_device_pb2.BasecalledReadChunkResponse(channel=channel, read_id=read_id, seq=seq, quality_seq=quality_seq, estimated_ref_len_so_far=estimated_ref_len_so_far) @print_nongen_exceptions @@ -122,7 +124,8 @@ def StartSim(self, request, context): Returns: whether it succeeded (i.e. if simulation was not running) """ - return ont_device_pb2.BoolResponse(value=self.device.start(request.acceleration_factor)) + acceleration_factor = request.acceleration_factor if request.acceleration_factor <= 0 else 1.0 + return ont_device_pb2.BoolResponse(value=self.device.start(acceleration_factor=acceleration_factor, update_method=request.update_method, log_interval=request.log_interval, stop_if_no_reads=request.stop_if_no_reads)) @print_nongen_exceptions # stop simulation, returns whether it succeeded (i.e. if simulation was running) @@ -131,6 +134,7 @@ def StopSim(self, request, context): @print_nongen_exceptions def RunMuxScan(self, request, context): + assert request.HasField("t_duration"), "t_duration must be set" return ont_device_pb2.MuxScanStartedInfo(value=self.device.run_mux_scan(t_duration=request.t_duration)) @print_nongen_exceptions diff --git a/src/simreaduntil/usecase_helpers/cli_usecase/__init__.py b/src/simreaduntil/usecase_helpers/cli_usecase/__init__.py new file mode 100644 index 0000000..e57d630 --- /dev/null +++ b/src/simreaduntil/usecase_helpers/cli_usecase/__init__.py @@ -0,0 +1,3 @@ +""" +Scripts to support the CLI usecase +""" \ No newline at end of file diff --git a/src/simreaduntil/usecase_helpers/cli_usecase/generate_random_reads.py b/src/simreaduntil/usecase_helpers/cli_usecase/generate_random_reads.py new file mode 100644 index 0000000..1dfbc8a --- /dev/null +++ b/src/simreaduntil/usecase_helpers/cli_usecase/generate_random_reads.py @@ -0,0 +1,70 @@ + +import argparse +import itertools +import logging +import os +import sys +from Bio.Seq import Seq +from Bio import SeqIO +from pathlib import Path +from tqdm import tqdm +from simreaduntil.shared_utils.debugging_helpers import is_test_mode +from simreaduntil.shared_utils.logging_utils import add_comprehensive_stream_handler_to_logger, print_logging_levels, setup_logger_simple +from simreaduntil.shared_utils.utils import print_args +from simreaduntil.simulator.readswriter import SingleFileReadsWriter +from simreaduntil.usecase_helpers.utils import random_nanosim_reads_gen + +logger = setup_logger_simple(__name__) +"""module logger""" + + +def parse_args(args=None): + parser = argparse.ArgumentParser(description="Generate dummy reads, only for testing purposes") + parser.add_argument("reads_file", type=Path, help="Path to write reads to") + parser.add_argument("--num_reads", type=int, help="Number of reads to generate", default=10_000) + parser.add_argument("--length_range", type=str, help="Length range of reads", default="5_000,10_000") + parser.add_argument("--overwrite", action="store_true", help="Overwrite existing reads file") + + args = parser.parse_args(args) + print_args(args, logger=logger) + + return args + +def main(): + log_level = logging.DEBUG + logging.getLogger(__name__).setLevel(log_level) + add_comprehensive_stream_handler_to_logger(None, level=log_level) + # print_logging_levels() + + if is_test_mode(): + os.chdir("server_client_cli_example") + args = parse_args(args=["test.fasta", "--overwrite"]) #todo + else: + args = parse_args() + + reads_file = args.reads_file + overwrite = args.overwrite + num_reads = args.num_reads + assert num_reads > 0, f"num_reads {num_reads} must be > 0" + length_range = args.length_range + length_range = tuple(map(int, length_range.split(","))) + assert len(length_range) == 2, f"length_range {length_range} must have length 2" + assert length_range[0] < length_range[1], f"length_range {length_range} must be increasing" + + assert reads_file.suffix == ".fasta", f"reads_file '{reads_file}' must end with '.fasta'" + if reads_file.exists(): + if overwrite: + # logger.warning(f"Overwriting existing reads file '{reads_file}'") + reads_file.unlink() + else: + raise FileExistsError(f"reads_file '{reads_file}' already exists, use --overwrite to overwrite") + + reads_gen = random_nanosim_reads_gen(length_range=length_range) + with open(reads_file, "w") as fh: + writer = SingleFileReadsWriter(fh) + for (read_id, seq) in tqdm(itertools.islice(reads_gen, num_reads), total=num_reads, desc="Writing read: "): + writer.write_read(SeqIO.SeqRecord(id=str(read_id), seq=Seq(seq))) + logger.info(f"Done writing reads to file '{reads_file}'") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/simreaduntil/usecase_helpers/cli_usecase/sim_plots_cli.py b/src/simreaduntil/usecase_helpers/cli_usecase/sim_plots_cli.py new file mode 100644 index 0000000..fe9164a --- /dev/null +++ b/src/simreaduntil/usecase_helpers/cli_usecase/sim_plots_cli.py @@ -0,0 +1,52 @@ +# superseed by seqsum_plotting.py +# todo: remove + +import argparse +import logging +from pathlib import Path +import shutil + +import toml +from simreaduntil.shared_utils.logging_utils import add_comprehensive_stream_handler_to_logger, print_logging_levels, setup_logger_simple +from simreaduntil.shared_utils.utils import print_args +from simreaduntil.usecase_helpers.utils import plot_sim_stats + +logger = setup_logger_simple(__name__) +"""module logger""" + +def parse_args(args=None): + parser = argparse.ArgumentParser(description="Plot simulator statistics. Use 'plot_seqsum' to plot sequencing summary statistics") + parser.add_argument("run_dir", type=Path, help="Path to the run directory of the simulator with files ") + parser.add_argument("--figure_dir", type=Path, help="Path to the figure directory, defaults to /figures, will overwrite files in there", default=None) + parser.add_argument("--verbosity", type=str, help="log level for plotting commands", default="info") + + args = parser.parse_args(args) + print_args(args, logger=logger) + return args + +def main(): + log_level = logging.DEBUG + logging.getLogger(__name__).setLevel(log_level) # log level of this script (running as __main__) + logging.getLogger("simreaduntil").setLevel(log_level) # log level of this script (running as __main__) + add_comprehensive_stream_handler_to_logger(None, level=log_level) + # print_logging_levels() + + args = parse_args() + + run_dir = args.run_dir + assert run_dir.exists(), f"run_dir '{run_dir}' does not exist" + figure_dir = args.figure_dir + if figure_dir is None: + figure_dir = run_dir / "figures" + verbosity = args.verbosity + + figure_dir.mkdir(parents=True, exist_ok=True) + + logging.getLogger("simreaduntil").setLevel({"debug": logging.DEBUG, "info": logging.INFO, "warning": logging.WARNING, "error": logging.ERROR, "critical": logging.CRITICAL}[verbosity.lower()]) + + plot_sim_stats(run_dir, figure_dir) + + logger.info("Done plotting simulator statistics") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/simreaduntil/usecase_helpers/cli_usecase/simulator_client_cli.py b/src/simreaduntil/usecase_helpers/cli_usecase/simulator_client_cli.py new file mode 100644 index 0000000..6af3603 --- /dev/null +++ b/src/simreaduntil/usecase_helpers/cli_usecase/simulator_client_cli.py @@ -0,0 +1,86 @@ + +import argparse +import logging +import time +import grpc + +import numpy as np +from tqdm import tqdm +from tqdm.contrib.logging import logging_redirect_tqdm +from simreaduntil.shared_utils.logging_utils import add_comprehensive_stream_handler_to_logger, print_logging_levels, setup_logger_simple + +from simreaduntil.shared_utils.utils import print_args +from simreaduntil.simulator.simulator_client import DeviceGRPCClient + +logger = setup_logger_simple(__name__) +"""module logger""" + +def parse_args(): + parser = argparse.ArgumentParser(description="Simple gRPC ReadUntil client that randomly decides to stop receiving, reject reads or take no action") + parser.add_argument("port", type=int, help="Port to connect to") + + args = parser.parse_args() + print_args(args, logger=logger) + return args + +def main(): + log_level = logging.INFO + logging.getLogger(None).setLevel(log_level) + add_comprehensive_stream_handler_to_logger(None, level=log_level) + # print_logging_levels() + + args = parse_args() + + port = args.port + assert port > 0, f"port {port} must be > 0" + + rng = np.random.default_rng(42) + + with DeviceGRPCClient(port) as client: + assert client.is_connected, "client not connected" + + if not client.start(acceleration_factor=1): + logger.warning("Simulator was already running") + assert client.is_running, "simulator not running" + + logger.info(f"Simulator writes reads to directory: {client.mk_run_dir}") + logger.info(f"Simulator has the following properties: {client.device_info()}") + + num_batches = 0 + num_chunks = 0 + + try: + with logging_redirect_tqdm(): + while True: + num_batches += 1 + for (channel, read_id, seq, quality_seq, estimated_ref_len_so_far) in tqdm(client.get_basecalled_read_chunks(), desc=f"Processing chunks in batch {num_batches}"): + num_chunks += 1 + logger.debug(f"Read chunk: channel={channel}, read_id={read_id}, seq={seq[:20]}..., quality_seq={quality_seq}, estimated_ref_len_so_far={estimated_ref_len_so_far}") + u = rng.uniform() + if u < 0.2: + logger.debug(f"Rejecting read '{read_id}'") + client.unblock_read(channel, read_id) + elif u < 0.4: + logger.debug(f"Stop receiving read '{read_id}'") + client.stop_receiving_read(channel, read_id) + else: + # no action + pass + # time.sleep(0.05) + time.sleep(0.2) # throttle + except KeyboardInterrupt: + pass + except grpc.RpcError as e: + logger.error(f"Caught gRPC error: {e}") + finally: + try: + if client.stop(): + logger.info("Stopped simulation") + except grpc.RpcError as e: + pass + + logger.info(f"Done. Received {num_chunks} chunks from {num_batches} batches") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/simreaduntil/usecase_helpers/cli_usecase/simulator_server_cli.py b/src/simreaduntil/usecase_helpers/cli_usecase/simulator_server_cli.py new file mode 100644 index 0000000..5497408 --- /dev/null +++ b/src/simreaduntil/usecase_helpers/cli_usecase/simulator_server_cli.py @@ -0,0 +1,147 @@ +""" +CLI interface for the simulator to access via gRPC, currently no mux scans +""" + +import argparse +import logging +import os +import shutil +import signal +import time +import numpy as np +from pathlib import Path +from simreaduntil.shared_utils.debugging_helpers import is_test_mode +from simreaduntil.shared_utils.logging_utils import add_comprehensive_stream_handler_to_logger, print_logging_levels, setup_logger_simple +from simreaduntil.shared_utils.utils import print_args +from simreaduntil.simulator.gap_sampling.constant_gaps_until_blocked import ConstantGapsUntilBlocked +from simreaduntil.simulator.gap_sampling.rolling_window_gap_sampler import RollingWindowGapSamplerPerChannel +from simreaduntil.simulator.readpool import ReadPoolFromFile +from simreaduntil.simulator.readswriter import RotatingFileReadsWriter +from simreaduntil.simulator.simfasta_to_seqsum import convert_simfasta_dir_to_seqsum +from simreaduntil.simulator.simulator import ONTSimulator, simulator_stats_to_disk +from simreaduntil.simulator.simulator_params import SimParams +from simreaduntil.simulator.simulator_server import launchable_device_grpc_server, manage_grpc_server + +logger = setup_logger_simple(__name__) +"""module logger""" + +def parse_args(args=None): + parser = argparse.ArgumentParser(description="Simple gRPC server exposing the ONT simulator given reads as input, see 'src/simreaduntil/usecase_helpers/simulator_with_readfish.py' for other options to use the simulator, e.g., from a genome and with mux scans, pore model for raw signal data") + parser.add_argument("reads_file", type=Path, help="Path to the reads, e.g., generated by NanoSim or perfect reads") + parser.add_argument("run_dir", type=Path, help="Run dir, must not exist", default="example_run") + # todo: add pore model, extract sim params from an existing run: ConstantGapsUntilBlocked.from_seqsum_df, RollingWindowGapSamplerPerChannel.from_seqsum_df + parser.add_argument("--num_channels", type=int, help="Number of channels", default=512) + # parser.add_argument("--run_time", type=float, help="Time to run for (s)", default=2 ** 63 / 10 ** 9) # maximum value handled by time.sleep + parser.add_argument("--acceleration_factor", type=float, help="Speedup factor for simulation", default=1.0) + parser.add_argument("--chunk_size", type=int, help="Number of basepairs per chunk (API returns a multiple of chunk_size per channel)", default=200) + parser.add_argument("--bp_per_second", type=int, help="Pore speed (number of basepairs per second (per channel))", default=450) + parser.add_argument("--seed", type=int, help="Random seed", default=None) + parser.add_argument("--unblock_duration", type=float, help="Duration to unblock a read (s)", default=0.1) + parser.add_argument("--overwrite", action="store_true", help="Overwrite existing run_dir") + parser.add_argument("--port", type=int, help="Port to use for gRPC server (0 = auto-assign)", default=0) + parser.add_argument("--dont_start", action="store_true", help="Don't start the simulator, only start the gRPC server") + # verbose arg + parser.add_argument("--verbosity", type=str, help="log level of simulator", default="info") + + args = parser.parse_args(args) + print_args(args, logger=logger) + return args + +def main(): + log_level = logging.DEBUG + logging.getLogger(__name__).setLevel(log_level) # log level of this script (running as __main__) + add_comprehensive_stream_handler_to_logger(None, level=log_level) + # print_logging_levels() + + ####### Argument parsing ####### + + if is_test_mode(): + os.chdir("server_client_cli_example") + # args = parse_args(args=["random_reads.fasta", "example_run", "--overwrite", "--verbosity", "info", "--dont_start", "--port", "12000"]) + args = parse_args(args=["random_reads.fasta", "example_run", "--overwrite", "--verbosity", "debug", "--dont_start", "--port", "12000"]) + else: + args = parse_args() + + reads_file = args.reads_file + assert reads_file.exists(), f"reads_file '{reads_file}' does not exist" + run_dir = args.run_dir + num_channels = args.num_channels + assert num_channels > 0, f"num_channels {num_channels} must be > 0" + # run_time = args.run_time + # assert run_time > 0, f"run_time {run_time} must be > 0" + acceleration_factor = args.acceleration_factor + assert acceleration_factor > 0, f"acceleration_factor {acceleration_factor} must be > 0" + chunk_size = args.chunk_size + assert chunk_size > 0, f"chunk_size {chunk_size} must be > 0" + bp_per_second = args.bp_per_second + assert bp_per_second > 0, f"bp_per_second {bp_per_second} must be > 0" + seed = args.seed + unblock_duration = args.unblock_duration + assert unblock_duration >= 0, f"unblock_duration {unblock_duration} must be >= 0" + overwrite = args.overwrite + port = args.port + assert port >= 0, f"port {port} must be >= 0" + verbosity = args.verbosity + dont_start = args.dont_start + + if run_dir.exists(): + if overwrite: + logger.warning(f"Overwriting existing run_dir '{run_dir}'") + shutil.rmtree(run_dir) + else: + raise FileExistsError(f"run_dir '{run_dir}' already exists, use --overwrite to overwrite") + run_dir.mkdir(parents=True) + + logging.getLogger("simreaduntil").setLevel({"debug": logging.DEBUG, "info": logging.INFO, "warning": logging.WARNING, "error": logging.ERROR, "critical": logging.CRITICAL}[verbosity.lower()]) + + ####### Setting up the simulator ####### + + logger.info("Reading in reads. pysam index creation may take some time.") + read_pool = ReadPoolFromFile(reads_file=reads_file) + + mk_run_dir = run_dir / "reads" + logger.info(f"Writing basecalled reads to directory '{mk_run_dir}'") + reads_writer = RotatingFileReadsWriter(mk_run_dir, "reads_", max_reads_per_file=4000) + + gap_samplers = {f"chan{channel}": ConstantGapsUntilBlocked(short_gap_length=0.4, long_gap_length=10, prob_long_gap=0.05, time_until_blocked=np.inf, read_delay=0.05) for channel in range(num_channels)} + logger.info("Using constant gap samplers") + sim_params = SimParams(gap_samplers=gap_samplers, bp_per_second=bp_per_second, default_unblock_duration=unblock_duration, chunk_size=chunk_size, seed=np.random.default_rng(seed)) + + simulator = ONTSimulator( + read_pool=read_pool, + reads_writer=reads_writer, + sim_params=sim_params, + ) + + ####### Starting the gRPC server ####### + port, server, unique_id = launchable_device_grpc_server(simulator, port=port) + assert port != 0, f"port {port} already in use" + + logger.info(f"Starting gRPC server on port {port}") + with manage_grpc_server(server): + logger.info("Started gRPC server") + try: + if not dont_start: + logger.info("Starting the simulation") + simulator.start(acceleration_factor=acceleration_factor, log_interval=100) + else: + logger.info("Not starting the simulation, must be started manually (via a gRPC call), or remove the --dont_start flag") + signal.pause() # wait for keyboard interrupt + except KeyboardInterrupt: + pass + finally: + if simulator.stop(): + logger.info("Stopped simulation") + + simulator_stats_to_disk([simulator], output_dir=run_dir) + + seqsum_filename = run_dir / "sequencing_summary.txt" + logger.info(f"Writing sequencing summary file '{seqsum_filename}'") + convert_simfasta_dir_to_seqsum(reads_writer.output_dir, seqsummary_filename=seqsum_filename) + logger.info("Wrote sequencing summary file") + + logger.info("Done with simulation") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/simreaduntil/usecase_helpers/utils.py b/src/simreaduntil/usecase_helpers/utils.py index e7b87b7..70a40f8 100644 --- a/src/simreaduntil/usecase_helpers/utils.py +++ b/src/simreaduntil/usecase_helpers/utils.py @@ -210,7 +210,7 @@ def normalize_fasta_cmd(): Command line script to normalize a FASTA file and write it back to a file """ - add_comprehensive_stream_handler_to_logger(level=logging.INFO) + add_comprehensive_stream_handler_to_logger(None, level=logging.INFO) # argparse to parse in_fasta_path, out_fasta_path parser = argparse.ArgumentParser(description="Normalize a FASTA file to upper case and replacing N letters") @@ -397,6 +397,32 @@ def get_gap_sampler_method(gap_sampler_type, n_channels_full=None): else: assert gap_sampler_type is None, f"unknown gap sampler type '{gap_sampler_type}'" +def plot_sim_stats(run_dir, figure_dir): + """ + Plot statistics from the simulator + + Args: + run_dir: directory containing simulator outputs, e.g. action_results.csv, channel_stats.dill, log file + figure_dir: directory to save figures to, must exist + """ + assert figure_dir.exists() + + action_results_filename = run_dir / "action_results.csv" + if action_results_filename.exists(): + logger.info(f"Loading simulator action results from '{action_results_filename}'") + action_results_df = pd.read_csv(action_results_filename, sep="\t") + logger.info(f"Loaded simulator action results from '{action_results_filename}'") + if len(action_results_df) > 0: + plot_sim_actions(action_results_df, save_dir=figure_dir) + else: + logger.warning("No actions were performed during the simulation") + + channel_stats_filename = run_dir / "channel_stats.dill" + if channel_stats_filename.exists(): + logger.debug("Plotting channel stats") + channel_stats: List[ChannelStats] = dill_load(channel_stats_filename) + plot_channel_stats(channel_stats, save_dir=figure_dir) + def create_figures(seqsum, run_dir, figure_dir=None, delete_existing_figure_dir=False, **kwargs): """ Create figures from a finished run @@ -422,21 +448,9 @@ def create_figures(seqsum, run_dir, figure_dir=None, delete_existing_figure_dir= # warnings.filterwarnings("ignore", message="Only plotting full reads") nb_reads = len(seqsum) if isinstance(seqsum, pd.DataFrame) else num_lines_in_file(seqsum) - 1 seqsum_df, cov_df = create_plots_for_seqsum(seqsum, cov_every=max(1, int(nb_reads/100)), save_dir=figure_dir, **kwargs) - - action_results_filename = run_dir / "action_results.csv" - if action_results_filename.exists(): - logger.info(f"Loading simulator action results from '{action_results_filename}'") - action_results_df = pd.read_csv(action_results_filename, sep="\t") - if len(action_results_df) > 0: - plot_sim_actions(action_results_df, save_dir=figure_dir) - else: - logger.warning("No actions were performed during the simulation") - - channel_stats_filename = run_dir / "channel_stats.dill" - if channel_stats_filename.exists(): - logger.debug("Plotting channel stats") - channel_stats: List[ChannelStats] = dill_load(channel_stats_filename) - plot_channel_stats(channel_stats, save_dir=figure_dir) + + plot_sim_stats(run_dir, figure_dir) + def plot_log_file_metrics(log_filename, save_dir=None): """Parse log file and plot metrics""" diff --git a/tests/seqsum_tools/data/README.md b/tests/seqsum_tools/data/README.md index 9f1b024..9d381a0 100644 --- a/tests/seqsum_tools/data/README.md +++ b/tests/seqsum_tools/data/README.md @@ -21,7 +21,7 @@ with open("tests/param_extractor/data/simreads.fasta", "w") as f: From a real run ```{python} -sequencing_summary_file = "/Users/maximilianmordig/Desktop/ont_sequencing/data/sequencing_summaries/uncalled/20190809_zymo_seqsum.txt" # from UNCALLED +sequencing_summary_file = "~/Desktop/ont_sequencing/data/sequencing_summaries/uncalled/20190809_zymo_seqsum.txt" # from UNCALLED df_read = pd.read_csv(sequencing_summary_file, sep="\t") sub_seqsum_df = df_read[(df_read["start_time"] < 13000) & (df_read["channel"] < 10)] sub_seqsum_df["end_reason"] = "signal_positive" diff --git a/usecases/README.md b/usecases/README.md index df036d6..294e5f5 100644 --- a/usecases/README.md +++ b/usecases/README.md @@ -1,4 +1,43 @@ -# Usecases +# CLI Usecase + +Here, we show how to use the simulator with the CLI interface and provide an example client that randomly decides what action to take. +Note that the client does not use the official ReadUntil API as our setup is slightly different. Since our tool is for research, SSL encryption is not needed (as of now), we restrict to only the essential ReadUntil API methods and the API returns basecalled rather than raw chunks (raise an issue if you want it added). + +Assuming you have followed the installation instructions, do the following starting from a directory of your choice: +```{bash} +mkdir server_client_cli_example +cd server_client_cli_example + +source ~/ont_project_all/ont_project_venv/bin/activate +usecase_generate_random_reads random_reads.fasta +# or: python ~/ont_project_all/ont_project/src/simreaduntil/usecase_helpers/generate_random_reads.py random_reads.fasta + +source ~/ont_project_all/ont_project_venv/bin/activate +simulator_server_cli random_reads.fasta example_run --overwrite --verbosity info --dont_start --port 12000 +# or: python ~/ont_project_all/ont_project/src/simreaduntil/usecase_helpers/simulator_server_cli.py random_reads.fasta example_run --overwrite --verbosity info --dont_start --port 12000 +``` + +Open a new terminal window and run the following command: +```{bash} +source ~/ont_project_all/ont_project_venv/bin/activate +usecase_simulator_client_cli 12000 +# or: python ~/ont_project_all/ont_project/src/simreaduntil/usecase_helpers/simulator_client_cli.py 12000 +``` + +You can stop the client with `Ctrl-C`. Also stop the server with `Ctrl-C` in the other terminal window, so that the final summary files get written (action results and sequencing summary). +You can also stop the server directly with `Ctrl-C` without doing so first for the client. + +Create plots with: +```{bash} +sim_plots_cli example_run --verbosity info +plot_seqsum example_run/sequencing_summary.txt --save_dir example_run/figures/ +# also possible to plot coverage of a reference by providing relevant args to "plot_seqsum" +``` + +Typically, you can leave the server cli script as it is, but want to modify the client and input reads. +You can take a look at the available options with `--help` appended to the commands. + +# Advanced Usecases Config files are located in the directory `configs` which contain configs for the simulator (and ReadFish). The paths in these scripts may need to be adapted to your local setup. diff --git a/usecases/compare_replication_methods.ipynb b/usecases/compare_replication_methods.ipynb index d625b85..c765837 100644 --- a/usecases/compare_replication_methods.ipynb +++ b/usecases/compare_replication_methods.ipynb @@ -83,7 +83,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Axes are not compatible: {'title': {'Number of active channels over time (486 active channels)', 'Number of reading channels over time (481 active channels)', 'Number of reading channels over time (512 active channels)', 'Number of reading channels over time (486 active channels)'}, 'ylabel': {'Number of active channels', 'Number of reading channels'}}\n" + "Axes are not compatible: {'title': {'Number of active channels over time (486 active channels)', 'Number of reading channels over time (481 active channels)', 'Number of reading channels over time (512 active channels)', 'Number of reading channels over time (486 active channels)'}, 'ylabel': {'Number of reading channels', 'Number of active channels'}}\n" ] }, { @@ -189,7 +189,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -199,13 +199,7 @@ "+ base_dir=/Users/maximilianmordig/ont_project_all/figures_cluster/runs/run_replication/\n", "+ target_base_dir=/Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/extraction_comparison/\n", "+ cp /Users/maximilianmordig/ont_project_all/figures_cluster/runs/run_replication/zymo_realrun_plots/figures/read_stats_by_channel.png /Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/extraction_comparison/read_stats_by_channel_zymo_realrun_plots.png\n", - "+ cp /Users/maximilianmordig/ont_project_all/figures_cluster/runs/run_replication/sampler_per_rolling_window_channel/simulator_run/figures/read_stats_by_channel.png /Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/extraction_comparison/read_stats_by_channel_rollingwindow.png\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "+ cp /Users/maximilianmordig/ont_project_all/figures_cluster/runs/run_replication/sampler_per_rolling_window_channel/simulator_run/figures/read_stats_by_channel.png /Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/extraction_comparison/read_stats_by_channel_rollingwindow.png\n", "+ cp /Users/maximilianmordig/ont_project_all/figures_cluster/runs/run_replication/replication/simulator_run/figures/read_stats_by_channel.png /Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/extraction_comparison/read_stats_by_channel_replication.png\n", "+ cp /Users/maximilianmordig/ont_project_all/figures_cluster/runs/run_replication/sampler_per_window/simulator_run/figures/read_stats_by_channel.png /Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/extraction_comparison/read_stats_by_channel_sampler_per_window.png\n", "+ cp /Users/maximilianmordig/ont_project_all/figures_cluster/runs/run_replication/constant_gaps/simulator_run/figures/read_stats_by_channel.png /Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/extraction_comparison/read_stats_by_channel_constantgaps.png\n", @@ -214,23 +208,7 @@ "+ cp /Users/maximilianmordig/ont_project_all/figures_cluster/runs/run_replication//combined_cum_nb_seq_bps_per_all.png /Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/extraction_comparison/\n", "+ base_dir=/Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/\n", "+ target_base_dir=/Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/readfish_usecase/\n", - "+ cp /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/channel_occupation_fraction_over_time.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/channels_over_time.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_full_reads_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_never_requested_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_reads_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_ref_bps_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_rejectedbps_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_rejections_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_seq_bps_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_stopped_receiving_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/end_reason_hist.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/extra_basecall_delay.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_cov1_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_cov2_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_cov3_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_cov4_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_covered_other.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_covered_target.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_until_rejection.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/nb_basepairs_recrej_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/nb_elements_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/nb_missed_stoprec_rejected_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/nb_successful_stoprec_rejected_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/proportion_channels_per_group_over_time.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/read_duration.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/read_length_full.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/read_length_rejected.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/read_stats_by_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/readfish_stats_per_iter.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/readfish_throttle.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/simulator_delay.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/time_spent_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/time_spent_per_element_per_channel.png /Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/readfish_usecase/\n", - "+ base_dir=/Users/maximilianmordig/ont_project_all/figures_cluster/runs/\n", - "+ target_base_dir=/Users/maximilianmordig/ont_project_all/figures_paper/Supplementary/\n", - "+ cd /Users/maximilianmordig/ont_project_all/figures_cluster/runs/\n", - "+ find data enrich_usecase run_replication -type d -name figures -exec bash -c 'filename={}; echo \"Converting dir $filename\"; convert {}/*.png /Users/maximilianmordig/ont_project_all/figures_paper/Supplementary/${filename//\\//__}.pdf' ';'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Converting dir enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures\n", - "Converting dir run_replication/constant_gaps/simulator_run/figures\n", - "Converting dir run_replication/sampler_per_window/simulator_run/figures\n", - "Converting dir run_replication/zymo_realrun_plots/figures\n", - "Converting dir run_replication/replication/simulator_run/figures\n", - "Converting dir run_replication/sampler_per_rolling_window_channel/simulator_run/figures\n" + "+ cp /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/channel_occupation_fraction_over_time.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/channels_over_time.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_full_reads_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_never_requested_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_reads_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_ref_bps_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_rejectedbps_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_rejections_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_seq_bps_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/cum_nb_stopped_receiving_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/end_reason_hist.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/extra_basecall_delay.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_cov1_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_cov2_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_cov3_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_cov4_per_group.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_covered_other.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_covered_target.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/fraction_until_rejection.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/nb_basepairs_recrej_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/nb_elements_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/nb_missed_stoprec_rejected_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/nb_successful_stoprec_rejected_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/proportion_channels_per_group_over_time.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/read_duration.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/read_length_full.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/read_length_rejected.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/read_stats_by_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/readfish_stats_per_iter.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/readfish_throttle.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/simulator_delay.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/time_spent_per_channel.png /Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/time_spent_per_element_per_channel.png /Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/readfish_usecase/\n" ] } ], @@ -258,8 +236,35 @@ "### copy enrich usecase\n", "base_dir=/Users/maximilianmordig/ont_project_all/figures_cluster/runs/enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures/\n", "target_base_dir=/Users/maximilianmordig/ont_project_all/figures_paper/MainPaper/readfish_usecase/\n", - "cp \"${base_dir}\"*.png \"${target_base_dir}\"\n", - "\n", + "cp \"${base_dir}\"*.png \"${target_base_dir}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Converting dir enrich_usecase/full_genome_run_sampler_per_window/simulator_run/figures\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Converting dir run_replication/constant_gaps/simulator_run/figures\n", + "Converting dir run_replication/sampler_per_window/simulator_run/figures\n", + "Converting dir run_replication/zymo_realrun_plots/figures\n", + "Converting dir run_replication/replication/simulator_run/figures\n", + "Converting dir run_replication/sampler_per_rolling_window_channel/simulator_run/figures\n" + ] + } + ], + "source": [ + "%%bash\n", "\n", "######## Supplement\n", "base_dir=/Users/maximilianmordig/ont_project_all/figures_cluster/runs/\n", diff --git a/usecases/plot_existing_seqsum.py b/usecases/plot_existing_seqsum.py index 1f4c598..9074233 100644 --- a/usecases/plot_existing_seqsum.py +++ b/usecases/plot_existing_seqsum.py @@ -1,7 +1,7 @@ """ Plot an existing sequencing summary file (that possibly comes with a PAF file). -This is similar to the entrypoint `plot_sim_seqsum`, but adapted to the cluster with sensible defaults. +This is similar to the entrypoint `plot_seqsum`, but adapted to the cluster with sensible defaults. It cleans up directories and plots figures from the simulator run as well, if they are present. """ @@ -23,8 +23,8 @@ sequencing_summary_file = "sequencing_summary.txt" paf_file = None -# paf_file = "/Users/maximilianmordig/ont_project_all/ont_project/runs/enrich_usecase/data/20190809_zymo_uncalled.paf" -# paf_file = "/Users/maximilianmordig/ont_project_all/ont_project/runs/enrich_usecase/data/deduped_20190809_zymo_uncalled.paf" +# paf_file = "~/ont_project_all/ont_project/runs/enrich_usecase/data/20190809_zymo_uncalled.paf" +# paf_file = "~/ont_project_all/ont_project/runs/enrich_usecase/data/deduped_20190809_zymo_uncalled.paf" # sequencing_summary_file = Path("~/ont_project_all/ont_project/runs/data/20190809_zymo_seqsum.txt").expanduser() # sim_run_dir = Path("zymo_realrun_plots/")