From 2d85a7ba1b429134f081dc69a058174a801b544e Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 3 Sep 2024 06:20:40 +0200 Subject: [PATCH] add singler --- src/methods/singler/config.vsh.yaml | 32 +++++++++++++++ src/methods/singler/script.R | 43 +++++++++++++++++++++ src/workflows/run_benchmark/config.vsh.yaml | 1 + src/workflows/run_benchmark/main.nf | 1 + 4 files changed, 77 insertions(+) create mode 100644 src/methods/singler/config.vsh.yaml create mode 100644 src/methods/singler/script.R diff --git a/src/methods/singler/config.vsh.yaml b/src/methods/singler/config.vsh.yaml new file mode 100644 index 0000000..082cfca --- /dev/null +++ b/src/methods/singler/config.vsh.yaml @@ -0,0 +1,32 @@ +__merge__: /src/api/comp_method.yaml +name: "singler" +label: SingleR +summary: Reference-Based Single-Cell RNA-Seq Annotation +description: | + Performs unbiased cell type recognition from single-cell RNA sequencing data, + by leveraging reference transcriptomic datasets of pure cell types to infer the + cell of origin of each single cell independently. +references: + doi: + - 10.1038/s41590-018-0276-y +links: + repository: https://www.bioconductor.org/packages/release/bioc/html/SingleR.html + documentation: https://www.bioconductor.org/packages/release/bioc/vignettes/SingleR/inst/doc/SingleR.html +info: + preferred_normalization: log_cp10k + variants: + seurat: +resources: + - type: r_script + path: script.R +engines: + - type: docker + image: openproblems/base_r:1.0.0 + setup: + - type: r + bioc: [SingleR] +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, highmem, highcpu] diff --git a/src/methods/singler/script.R b/src/methods/singler/script.R new file mode 100644 index 0000000..f975eca --- /dev/null +++ b/src/methods/singler/script.R @@ -0,0 +1,43 @@ +cat(">> Loading dependencies\n") +library(anndata, warn.conflicts = FALSE) +requireNamespace("SingleR", quietly = TRUE) +library(Matrix, warn.conflicts = FALSE) + +## VIASH START +par <- list( + input_train = "resources_test/task_label_projection/pancreas/train.h5ad", + input_test = "resources_test/task_label_projection/pancreas/test.h5ad", + output = "output.h5ad" +) +meta <- list( + name = "seurat_transferdata" +) +## VIASH END + +cat(">> Load input data\n") +input_train <- read_h5ad(par$input_train) +input_test <- read_h5ad(par$input_test) + +cat(">> Run method\n") +pred <- SingleR::SingleR( + test = t(input_test$layers[["normalized"]]), + ref = t(input_train$layers[["normalized"]]), + labels = input_train$obs$label +) + +cat(">> Create output data\n") +output <- anndata::AnnData( + obs = data.frame( + row.names = input_test$obs_names, + label_pred = pred$labels + ), + uns = list( + method_id = meta$name, + dataset_id = input_test$uns[["dataset_id"]], + normalization_id = input_test$uns[["normalization_id"]] + ), + shape = c(input_test$n_obs, 0L) +) + +cat(">> Write output to file\n") +output$write_h5ad(par$output, compression = "gzip") diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index d66592f..b291a2d 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -76,6 +76,7 @@ dependencies: - name: methods/scanvi - name: methods/scanvi_scarches - name: methods/seurat_transferdata + - name: methods/singler - name: methods/xgboost - name: metrics/accuracy - name: metrics/f1 diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index d7245d9..3e3970f 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -16,6 +16,7 @@ methods = [ scanvi, scanvi_scarches, seurat_transferdata, + singler, xgboost ]