diff --git a/www/api/resources/projectr.py b/www/api/resources/projectr.py index 304a8a68..6063d935 100644 --- a/www/api/resources/projectr.py +++ b/www/api/resources/projectr.py @@ -321,10 +321,11 @@ def projectr_callback(dataset_id, genecart_id, projection_id, session_id, scope, # If dataset genes have duplicated index names, we need to rename them to avoid errors # in collecting rownames in projectR (which gives invalid output) # This means these duplicated genes will not be in the intersection of the dataset and pattern genes - dedup_copy = Path(ana.dataset_path().replace('.h5ad', '.dups_removed.h5ad')) - if dedup_copy.exists(): - dedup_copy.unlink() - adata = adata[:, adata.var.index.duplicated(keep="first") == False].copy(filename=dedup_copy) + if (adata.var.index.duplicated(keep="first") == True).any(): + dedup_copy = Path(ana.dataset_path().replace('.h5ad', '.dups_removed.h5ad')) + if dedup_copy.exists(): + dedup_copy.unlink() + adata = adata[:, adata.var.index.duplicated(keep="first") == False].copy(filename=dedup_copy) num_target_genes = adata.shape[1] num_loading_genes = loading_df.shape[0] diff --git a/www/api/resources/tsne_data.py b/www/api/resources/tsne_data.py index 7c4dac52..cfe7ba68 100644 --- a/www/api/resources/tsne_data.py +++ b/www/api/resources/tsne_data.py @@ -348,12 +348,8 @@ def post(self, dataset_id): # delete the original column selected.obs.drop(selected_gene, axis=1, inplace=True) - df = selected.to_df() success = 1 message = "" - if len(df.columns) > 1: - success = 2 - message = "WARNING: Multiple Ensemble IDs found for gene symbol '{}'. Using the first stored Ensembl ID.".format(selected_gene) # Drop duplicate gene symbols so that only 1 ensemble ID is used in scanpy selected.var = selected.var.reset_index().set_index('gene_symbol') @@ -361,7 +357,10 @@ def post(self, dataset_id): # Rename to end the confusion selected.var = selected.var.rename(columns={selected.var.columns[0]: "ensembl_id"}) # Modify the AnnData object to not include any duplicated gene symbols (keep only first entry) - if len(df.columns) > 1: + if (selected.var.index.duplicated(keep="first") == True).any(): + success = 2 + message = "WARNING: Multiple Ensemble IDs found for gene symbol '{}'. Using the first stored Ensembl ID.".format(selected_gene) + dedup_copy = ana.dataset_path().replace('.h5ad', '.dups_removed.h5ad') if os.path.exists(dedup_copy): os.remove(dedup_copy)