From 030108b7e47956d153210f44718128bc5ef9432b Mon Sep 17 00:00:00 2001 From: Joshua Orvis Date: Tue, 20 Aug 2024 10:56:22 -0500 Subject: [PATCH] Made chunk size configurable --- bin/convert_3tab_to_h5ad_lowmem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/convert_3tab_to_h5ad_lowmem.py b/bin/convert_3tab_to_h5ad_lowmem.py index bbc8202e..981f605c 100755 --- a/bin/convert_3tab_to_h5ad_lowmem.py +++ b/bin/convert_3tab_to_h5ad_lowmem.py @@ -25,6 +25,7 @@ def main(): parser = argparse.ArgumentParser( description='3-tab -> H5AD') parser.add_argument('-i', '--input_directory', type=str, required=True, help='Path to an input directory containing the 3-tab files') parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to the output file to be created') + parser.add_argument('-r', '--row_chunk_size', type=int, required=False, default=500, help='Rows of input to be read at a time (helps control memory)') args = parser.parse_args() @@ -61,7 +62,7 @@ def main(): print("Creating AnnData object with obs and var", file=sys.stderr, flush=True) adata = sc.AnnData(obs=var, var=obs) print("Reading expression matrix file: {0}".format(expression_matrix_path), file=sys.stderr, flush=True) - reader = pd.read_csv(expression_matrix_path, sep='\t', index_col=0, chunksize=500) + reader = pd.read_csv(expression_matrix_path, sep='\t', index_col=0, chunksize=args.row_chunk_size) adata.X = sparse.vstack([sparse.csr_matrix(chunk.values) for chunk in reader]) print("Finished reading expression matrix file", file=sys.stderr, flush=True) adata = adata.transpose()