Refactoring

IGS · Aug 14, 2024 · c521b3e · c521b3e
1 parent ee5aae1
commit c521b3e
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 9 deletions.
diff --git a/www/api/resources/projectr.py b/www/api/resources/projectr.py
@@ -321,10 +321,11 @@ def projectr_callback(dataset_id, genecart_id, projection_id, session_id, scope,
     # If dataset genes have duplicated index names, we need to rename them to avoid errors
     # in collecting rownames in projectR (which gives invalid output)
     # This means these duplicated genes will not be in the intersection of the dataset and pattern genes
-    dedup_copy = Path(ana.dataset_path().replace('.h5ad', '.dups_removed.h5ad'))
-    if dedup_copy.exists():
-        dedup_copy.unlink()
-    adata = adata[:, adata.var.index.duplicated(keep="first") == False].copy(filename=dedup_copy)
+    if (adata.var.index.duplicated(keep="first") == True).any():
+        dedup_copy = Path(ana.dataset_path().replace('.h5ad', '.dups_removed.h5ad'))
+        if dedup_copy.exists():
+            dedup_copy.unlink()
+        adata = adata[:, adata.var.index.duplicated(keep="first") == False].copy(filename=dedup_copy)
 
     num_target_genes = adata.shape[1]
     num_loading_genes = loading_df.shape[0]

diff --git a/www/api/resources/tsne_data.py b/www/api/resources/tsne_data.py
@@ -348,20 +348,19 @@ def post(self, dataset_id):
             # delete the original column
             selected.obs.drop(selected_gene, axis=1, inplace=True)
 
-        df = selected.to_df()
         success = 1
         message = ""
-        if len(df.columns) > 1:
-            success = 2
-            message = "WARNING: Multiple Ensemble IDs found for gene symbol '{}'.  Using the first stored Ensembl ID.".format(selected_gene)
 
         # Drop duplicate gene symbols so that only 1 ensemble ID is used in scanpy
         selected.var = selected.var.reset_index().set_index('gene_symbol')
         # Currently the ensembl_id column is still called 'index', which could be confusing when looking at the new .index
         # Rename to end the confusion
         selected.var = selected.var.rename(columns={selected.var.columns[0]: "ensembl_id"})
         # Modify the AnnData object to not include any duplicated gene symbols (keep only first entry)
-        if len(df.columns) > 1:
+        if (selected.var.index.duplicated(keep="first") == True).any():
+            success = 2
+            message = "WARNING: Multiple Ensemble IDs found for gene symbol '{}'.  Using the first stored Ensembl ID.".format(selected_gene)
+
             dedup_copy = ana.dataset_path().replace('.h5ad', '.dups_removed.h5ad')
             if os.path.exists(dedup_copy):
                 os.remove(dedup_copy)