Skip to content

Commit

Permalink
Now checks to make sure it doesn't process more than one file of the …
Browse files Browse the repository at this point in the history
…same type in the input directory
  • Loading branch information
jorvis committed Nov 21, 2023
1 parent 6577009 commit b8516ee
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions bin/convert_3tab_to_h5ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,44 @@ def main():

args = parser.parse_args()

# Helps track what we've seen so far so duplicate files aren't processed
expression_file_found = False
rowmeta_file_found = False
colmeta_file_found = False

for infile in os.listdir(args.input_directory):
filepath = "{0}/{1}".format(args.input_directory, infile)

# Read each file as pandas dataframes
if infile == 'expression.tab' or os.path.basename(filepath)== 'expression.tab' or 'DataMTX.tab' in infile:
if expression_file_found:
print("WARN: Skipping this file since expression data file already found: {0}".format(infile), file=sys.stderr)
continue

# Get columns and rows of expression data in list form.
exp = pd.read_table(filepath, sep='\t', index_col=0, header=0)
exp_obs = list(exp.columns)
exp_genes= list(exp.index)

# Read in expressions as AnnData object
adata = sc.read(filepath, first_column_names=True, cache=False).transpose()
expression_file_found = True

elif infile == 'observations.tab' or os.path.basename(filepath)== 'observations.tab' or 'COLmeta.tab' in infile:
if colmeta_file_found:
print("WARN: Skipping this file since colmeta data file already found: {0}".format(infile), file=sys.stderr)
continue

obs = pd.read_table(filepath, sep='\t', index_col=0, header=0)
colmeta_file_found = True

elif infile == 'genes.tab' or os.path.basename(filepath)== 'genes.tab' or 'ROWmeta.tab' in infile:
if rowmeta_file_found:
print("WARN: Skipping this file since rowmeta data file already found: {0}".format(infile), file=sys.stderr)
continue

var = pd.read_table(filepath, sep='\t', index_col=0, header=0)
rowmeta_file_found = True

for str_type in ['cell_type', 'condition', 'time_point', 'time_unit']:
if str_type in obs.columns:
Expand Down

0 comments on commit b8516ee

Please sign in to comment.