Skip to content

Commit

Permalink
Made chunk size configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
jorvis committed Aug 20, 2024
1 parent 148e608 commit 030108b
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion bin/convert_3tab_to_h5ad_lowmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def main():
parser = argparse.ArgumentParser( description='3-tab -> H5AD')
parser.add_argument('-i', '--input_directory', type=str, required=True, help='Path to an input directory containing the 3-tab files')
parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to the output file to be created')
parser.add_argument('-r', '--row_chunk_size', type=int, required=False, default=500, help='Rows of input to be read at a time (helps control memory)')

args = parser.parse_args()

Expand Down Expand Up @@ -61,7 +62,7 @@ def main():
print("Creating AnnData object with obs and var", file=sys.stderr, flush=True)
adata = sc.AnnData(obs=var, var=obs)
print("Reading expression matrix file: {0}".format(expression_matrix_path), file=sys.stderr, flush=True)
reader = pd.read_csv(expression_matrix_path, sep='\t', index_col=0, chunksize=500)
reader = pd.read_csv(expression_matrix_path, sep='\t', index_col=0, chunksize=args.row_chunk_size)
adata.X = sparse.vstack([sparse.csr_matrix(chunk.values) for chunk in reader])
print("Finished reading expression matrix file", file=sys.stderr, flush=True)
adata = adata.transpose()
Expand Down

0 comments on commit 030108b

Please sign in to comment.