Skip to content

Commit

Permalink
Refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
adkinsrs committed Aug 14, 2024
1 parent ee5aae1 commit c521b3e
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
9 changes: 5 additions & 4 deletions www/api/resources/projectr.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,10 +321,11 @@ def projectr_callback(dataset_id, genecart_id, projection_id, session_id, scope,
# If dataset genes have duplicated index names, we need to rename them to avoid errors
# in collecting rownames in projectR (which gives invalid output)
# This means these duplicated genes will not be in the intersection of the dataset and pattern genes
dedup_copy = Path(ana.dataset_path().replace('.h5ad', '.dups_removed.h5ad'))
if dedup_copy.exists():
dedup_copy.unlink()
adata = adata[:, adata.var.index.duplicated(keep="first") == False].copy(filename=dedup_copy)
if (adata.var.index.duplicated(keep="first") == True).any():
dedup_copy = Path(ana.dataset_path().replace('.h5ad', '.dups_removed.h5ad'))
if dedup_copy.exists():
dedup_copy.unlink()
adata = adata[:, adata.var.index.duplicated(keep="first") == False].copy(filename=dedup_copy)

num_target_genes = adata.shape[1]
num_loading_genes = loading_df.shape[0]
Expand Down
9 changes: 4 additions & 5 deletions www/api/resources/tsne_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,20 +348,19 @@ def post(self, dataset_id):
# delete the original column
selected.obs.drop(selected_gene, axis=1, inplace=True)

df = selected.to_df()
success = 1
message = ""
if len(df.columns) > 1:
success = 2
message = "WARNING: Multiple Ensemble IDs found for gene symbol '{}'. Using the first stored Ensembl ID.".format(selected_gene)

# Drop duplicate gene symbols so that only 1 ensemble ID is used in scanpy
selected.var = selected.var.reset_index().set_index('gene_symbol')
# Currently the ensembl_id column is still called 'index', which could be confusing when looking at the new .index
# Rename to end the confusion
selected.var = selected.var.rename(columns={selected.var.columns[0]: "ensembl_id"})
# Modify the AnnData object to not include any duplicated gene symbols (keep only first entry)
if len(df.columns) > 1:
if (selected.var.index.duplicated(keep="first") == True).any():
success = 2
message = "WARNING: Multiple Ensemble IDs found for gene symbol '{}'. Using the first stored Ensembl ID.".format(selected_gene)

dedup_copy = ana.dataset_path().replace('.h5ad', '.dups_removed.h5ad')
if os.path.exists(dedup_copy):
os.remove(dedup_copy)
Expand Down

0 comments on commit c521b3e

Please sign in to comment.