Same translation as in add_annotation added for consistency

ICB-DCM · Jun 18, 2024 · 9c65588 · 9c65588
1 parent 52e486f
commit 9c65588
Show file tree

Hide file tree

Showing 4 changed files with 56 additions and 25 deletions.
diff --git a/program/shinyApp/R/enrichment_analysis/check_annotation.R b/program/shinyApp/R/enrichment_analysis/check_annotation.R
@@ -9,32 +9,33 @@ check_annotation_enrichment_analysis <- function(data){
     "Entrez.ID", "entrezgene_id"
   )
   symbol_opt <- c(
-    "symbol", "Symbol", "gene_symbol", "Gene_Symbol", "Nomenclature", "SYMBOL", "Gene.Symbol"
+    "symbol", "Symbol", "gene_symbol", "Gene_Symbol", "Nomenclature", "SYMBOL",
+    "Gene.Symbol", "external_gene_name"
   )
   # check if annotation is in row-annotation
   if (any(entrez_opt %in% colnames(rowData(data)))){
-    rowData(data)["ENTREZID"] <- rowData(data)[entrez_opt[entrez_opt %in% colnames(rowData(data))]]
+    rowData(data)["entrezgene_id"] <- rowData(data)[entrez_opt[entrez_opt %in% colnames(rowData(data))]]
     return(list(
       "no_ann" = FALSE,
-      "base_annotation" = "ENTREZID",
+      "base_annotation" = "entrezgene_id",
       "can_start" = TRUE,
       "new_data" = data
     ))
   }
   if (any(ensembl_opt %in% colnames(rowData(data)))){
-    rowData(data)["ENSEMBL"] <- rowData(data)[ensembl_opt[ensembl_opt %in% colnames(rowData(data))]]
+    rowData(data)["ensembl_gene_id"] <- rowData(data)[ensembl_opt[ensembl_opt %in% colnames(rowData(data))]]
     return(list(
       "no_ann" = FALSE,
-      "base_annotation" = "ENSEMBL",
+      "base_annotation" = "ensembl_gene_id",
       "can_start" = FALSE,
       "new_data" = data
     ))
   }
   if (any(symbol_opt %in% colnames(rowData(data)))){
-    rowData(data)["SYMBOL"] <- rowData(data)[symbol_opt[symbol_opt %in% colnames(rowData(data))]]
+    rowData(data)["external_gene_name"] <- rowData(data)[symbol_opt[symbol_opt %in% colnames(rowData(data))]]
     return(list(
       "no_ann" = FALSE,
-      "base_annotation" = "SYMBOL",
+      "base_annotation" = "external_gene_name",
       "can_start" = FALSE,
       "new_data" = data
     ))

diff --git a/program/shinyApp/R/enrichment_analysis/enrichment_analysis.R b/program/shinyApp/R/enrichment_analysis/enrichment_analysis.R
@@ -10,7 +10,7 @@ gene_set_enrichment <- function(
   sorting
 ){
   # assign the correct names to geneSetChoice
-  names(geneSetChoice) <- rowData(data)[["ENTREZID"]]
+  names(geneSetChoice) <- rowData(data)[["entrezgene_id"]]
   geneSetChoice <- sort(geneSetChoice,decreasing = T)
   # remove duplicate entries (keep the one highest in list)
   geneSetChoice <- geneSetChoice[!duplicated(names(geneSetChoice))]

diff --git a/program/shinyApp/R/enrichment_analysis/server.R b/program/shinyApp/R/enrichment_analysis/server.R
@@ -686,8 +686,8 @@ enrichment_analysis_Server <- function(id, data, params, updates){
             selectInput(
               inputId = ns("AnnotationSelection"),
               label = "Which annotation are you using?",
-              choices = c("ENSEMBL", "ENTREZID", "SYMBOL"),
-              selected="ENTREZID",
+              choices = c("ensembl_gene_id", "external_gene_name", "entrezgene_id"),
+              selected="entrezgene_id",
               multiple = F
             ),
             actionButton(inputId = ns("AMC"), label = "Proceed"),

diff --git a/program/shinyApp/R/enrichment_analysis/translation.R b/program/shinyApp/R/enrichment_analysis/translation.R
@@ -3,18 +3,52 @@ translate_genes_ea <- function(data, annotation_results, input){
     # copy rownames with corresponding annotation as columnname
     rowData(data)[[annotation_results$base_annotation]] <- rownames(rowData(data))  # can this be just data?
   }
-  # translate to entrez id, currently only Humand and Mouse supported
-  if(par_tmp[[session$token]]['organism'] == "Human genes (GRCh38.p14)"){
-    orgDb <- org.Hs.eg.db::org.Hs.eg.db
-  }else{
-    orgDb <- org.Mm.eg.db::org.Mm.eg.db
-  }
-  rowData(data)[["ENTREZID"]] <- AnnotationDbi::mapIds(
-    orgDb,
-    keys = rowData(data)[[annotation_results$base_annotation]],
-    column = "ENTREZID",
-    keytype = annotation_results$base_annotation
+  datasets_avail <- listDatasets(useEnsembl(biomart = "genes"))
+  ensembl <- useEnsembl(
+    biomart = "ensembl",
+    dataset = datasets_avail[datasets_avail$description == par_tmp[[session$token]]['organism'], "dataset"]
+  )
+  out <- getBM(
+    attributes = c("ensembl_gene_id", "gene_biotype", "external_gene_name", "entrezgene_id"),
+    filter = annotation_results$base_annotation,
+    values = rowData(data)[,annotation_results$base_annotation],
+    mart = ensembl
   )
+  # Align the rows based on matching annotation
+  match_indices <- match(
+    rowData(data)[,annotation_results$base_annotation], out[,annotation_results$base_annotation]
+  )
+  matched_out <- out[match_indices, ]
+
+  if (all(is.na(matched_out$ensembl_gene_id))) {
+    # Most likely wrong organism used
+    output$debug <- renderText({"<font color=\"#ab020a\"><b>You have most likely chosen the wrong organism! No annotation was added</b></font>"})
+  } else {
+    # Initialize new columns in the rowData with NA
+    rowData(data)$ensembl_gene_id <- NA
+    rowData(data)$gene_biotype <- NA
+    rowData(data)$external_gene_name <- NA
+    rowData(data)$entrezgene_id <- NA
+
+    # Update rowData with matched information
+    matched_rows <- !is.na(match_indices)
+    rowData(data)$ensembl_gene_id[matched_rows] <- matched_out$ensembl_gene_id[matched_rows]
+    rowData(data)$gene_biotype[matched_rows] <- matched_out$gene_biotype[matched_rows]
+    rowData(data)$external_gene_name[matched_rows] <- matched_out$external_gene_name[matched_rows]
+    rowData(data)$entrezgene_id[matched_rows] <- matched_out$entrezgene_id[matched_rows]
+  }
+  # # translate to entrez id, currently only Humand and Mouse supported
+  # if(par_tmp[[session$token]]['organism'] == "Human genes (GRCh38.p14)"){
+  #   orgDb <- org.Hs.eg.db::org.Hs.eg.db
+  # }else{
+  #   orgDb <- org.Mm.eg.db::org.Mm.eg.db
+  # }
+  # rowData(data)[["ENTREZID"]] <- AnnotationDbi::mapIds(
+  #   orgDb,
+  #   keys = rowData(data)[[annotation_results$base_annotation]],
+  #   column = "ENTREZID",
+  #   keytype = annotation_results$base_annotation
+  # )
   return(data)
 }
 
@@ -61,8 +95,4 @@ translate_genes_oa <- function(
     )
     return(geneSetChoice)
   }
-  # case DE genes to be added
-  # if(geneSet2Enrich == "DE_Genes"){
-  #
-  # }
 }