diff --git a/.gitignore b/.gitignore index 086b3a0..3302caf 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,9 @@ docs /doc/ /Meta/ .DS* +inst/rmarkdown/templates/rnaseq/skeleton/DE/Multiplicative_DGE_Analysis.Rmd +tests/* +.Rdata +.httr-oauth +.DS_Store +.quarto diff --git a/DESCRIPTION b/DESCRIPTION index 40012fd..e7d76dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: bcbioR Type: Package Title: Templates and functions to guide downstream analysis and data interpretation -Version: 0.1.3 +Version: 0.2.0 Authors@R: person("Pantano", "Lorena", , "lorena.pantano@gmail.com", role = c("aut", "cre")) Description: Collaborative code repository at the Harvard Chan Bioinformatics Core. @@ -20,8 +20,13 @@ Imports: grDevices, R.utils, readr, + withr, + usethis, fs, - withr + jsonlite, + yaml, + whisker, + rlang Suggests: knitr, rmarkdown, @@ -30,4 +35,4 @@ VignetteBuilder: knitr URL: http://bcb.io/bcbioR/ Config/testthat/edition: 3 Depends: - R (>= 2.10) + R (>= 4.3.1) diff --git a/NAMESPACE b/NAMESPACE index 3cc700a..00ac2cc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,18 +1,21 @@ # Generated by roxygen2: do not edit by hand export(bcbio_nfcore_check) -export(bcbio_set_project) export(bcbio_templates) export(cb_friendly_cols) export(cb_friendly_pal) export(list_cb_friendly_cols) export(scale_color_cb_friendly) export(scale_fill_cb_friendly) +export(use_bcbio_analysis) +export(use_bcbio_projects) import(DESeq2) import(R.utils) +import(fs) import(ggplot2) import(ggprism) import(hues) +import(usethis) importFrom(grDevices,colorRampPalette) importFrom(magrittr,"%>%") importFrom(readr,read_csv) diff --git a/R/bcbioR-package.R b/R/bcbioR-package.R index 64459df..903be7b 100644 --- a/R/bcbioR-package.R +++ b/R/bcbioR-package.R @@ -9,6 +9,8 @@ ## usethis namespace: end #' @import DESeq2 #' @import ggplot2 +#' @import usethis +#' @import fs #' @import hues #' @import ggprism #' @import R.utils diff --git a/R/hello.R b/R/hello.R deleted file mode 100644 index aee6389..0000000 --- a/R/hello.R +++ /dev/null @@ -1,167 +0,0 @@ -.fix <- function(x){ - x <- tolower(x) %>% str_replace_all(., "[[:punct:]]", "_") - x <- str_replace_all(x, " ", "_") - return(x) -} - - -#' Function to check samplesheet for nf-core -#' -#' @param file path to CSV file for nf-core -#' @examples -#' -#' bcbio_nfcore_check(system.file("extdata", "rnaseq_good.csv", package = "bcbioR") ) -#' -#' @export -bcbio_nfcore_check <- function(file){ - required=c("sample","fastq_1","fastq_2","strandedness") - samplesheet=read_csv(file) - - if (!(all(required %in% colnames(samplesheet)))){ - print(colnames(samplesheet)) - stop("Missing required columns ", paste(required, collapse = " ")) - }else if (any(grepl("^[1-9]", samplesheet[["sample"]]))){ - stop("Avoid samples starting with numbers ") - }else if (any(is.na(samplesheet))){ - warning("Columns with missing values") - }else{ - message("All good.") - } -} - -#' Function to help deploy analysis folder inside a project folder -#' -#' This function contains Rmd, R, md, files that help to structure -#' an analysis following HCBC best-practices. -#' For rnaseq, it will deploy: QC and DE Rmd with additional files to help -#' to facilitate the analysis as needed. -#' -#' Normally these helper files are inside a report folder inside a -#' project folder. -#' -#' @param type string indicating the type of analysis, supported: -#' - base -#' - rnaseq, scrnaseq, -#' - teaseq -#' - cosmx -#' -#' @param outpath string path indicating where to copy all the files to -#' @examples -#' \dontrun{ -#' bcbio_templates("rnaseq", "path_to_projects/project1/reports") -#' } -#' @export -bcbio_templates <- function(type="rnaseq", outpath){ - switch(type, - base={ - fpath <- system.file("rmarkdown/templates/common", "skeleton", package="bcbioR") - copyDirectory(fpath, outpath) - }, - rnaseq={ - fpath <- system.file("rmarkdown/templates/rnaseq", "skeleton", package="bcbioR") - copyDirectory(fpath, outpath) - }, - scrnaseq={ - fpath <- system.file("rmarkdown/templates/singlecell", "skeleton", package="bcbioR") - copyDirectory(fpath, outpath) - }, - teaseq={ - fpath <- system.file("rmarkdown/templates/teaseq", "skeleton", package="bcbioR") - copyDirectory(fpath, outpath) - }, - cosmx={ - fpath <- system.file("rmarkdown/templates/cosmx", "skeleton", package="bcbioR") - copyDirectory(fpath, outpath) - }, - { - stop('project type not recognize, please choose: ', 'base', - 'rnaseq', 'scrnaseq', - 'teaseq', 'cosmx') - } - ) -} - -#' Function to help with project name used for parent folder -#' -#' This function will ask for user input about: -#' * numeric code -#' * PI full name -#' * technology -#' * tissue -#' * organism -#' * project description -#' -#' It removes special character with `_`. The output is a guideline to -#' what the folder used can be. -#' -#' @returns A string list with hbc_code, and project folder name -#' @export -bcbio_set_project <- function() { - hbc_code <- readline("What is the hbc code (only numbers):\n") - hbc_code <- paste0("hbc", hbc_code) - pi <- readline("What is PI last name:\n") - technology <- readline("What is the technology:\n") - tissue <- readline("What is the tissue:\n") - org <- readline("What is the organism:\n") - project <- readline("What is the project name:\n") - #dropbox <- readline("What is the dropbox name:\n") - #github_org <- readline("What is the github organization:\n") - #hbc_$technology_of_$pilastname_$intervention_on_$tissue_in_$organism_$hbccode - project_full <- paste(technology, .fix(pi), .fix(project), tissue, org, hbc_code, sep="_") - #github <- c(github_org,project_full) - opts <- list(code=hbc_code, project=project_full) - #dropbox=file.path(dropbox,project_full), - #github=github) - print(opts) - return(opts) -} - - -bcbio_start_project <- function(options) { - -} - -bcbio_gitignore <- function(options) { - -} - -# This function showcases how one might write a function to be used as an -# RStudio project template. This function will be called when the user invokes -# the New Project wizard using the project template defined in the template file -# at: -# -# inst/rstudio/templates/project/hello_world.dcf -# -# The function itself just echos its inputs and outputs to a file called INDEX, -# which is then opened by RStudio when the new project is opened. -rnaseq <- function(path, ...) { - - # ensure path exists - dir.create(path, recursive = TRUE, showWarnings = FALSE) - - # generate header - header <- c( - "# This file was generated by a call to 'ptexamples::hello_world()'.", - "# The following inputs were received:", - "" - ) - - # collect inputs - dots <- list(...) - text <- lapply(seq_along(dots), function(i) { - key <- names(dots)[[i]] - val <- dots[[i]] - paste0(key, ": ", val) - }) - - # collect into single text string - contents <- paste( - paste(header, collapse = "\n"), - paste(text, collapse = "\n"), - sep = "\n" - ) - - # write to index file - writeLines(contents, con = file.path(path, "README.md")) - -} diff --git a/R/helpers.R b/R/helpers.R new file mode 100644 index 0000000..799ae39 --- /dev/null +++ b/R/helpers.R @@ -0,0 +1,297 @@ +.fix <- function(x){ + x <- tolower(x) %>% str_replace_all(., "[[:punct:]]", "_") + x <- str_replace_all(x, " ", "_") + return(x) +} + + +#' Function to check samplesheet for nf-core +#' +#' @param file path to CSV file for nf-core +#' @examples +#' +#' bcbio_nfcore_check(system.file("extdata", "rnaseq_good.csv", package = "bcbioR") ) +#' +#' @export +bcbio_nfcore_check <- function(file){ + required=c("sample","fastq_1","fastq_2","strandedness") + samplesheet=read_csv(file) + + if (!(all(required %in% colnames(samplesheet)))){ + print(colnames(samplesheet)) + stop("Missing required columns ", paste(required, collapse = " ")) + }else if (any(grepl("^[1-9]", samplesheet[["sample"]]))){ + stop("Avoid samples starting with numbers ") + }else if (any(is.na(samplesheet))){ + warning("Columns with missing values") + }else{ + message("All good.") + } +} + +#' Function to help deploy analysis folder inside a project folder +#' +#' This function contains Rmd, R, md, files that help to structure +#' an analysis following HCBC best-practices. +#' For rnaseq, it will deploy: QC and DE Rmd with additional files to help +#' to facilitate the analysis as needed. +#' +#' Normally these helper files are inside a report folder inside a +#' project folder. +#' +#' @param type string indicating the type of analysis, supported: rnaseq. +#' +#' @param outpath string path indicating where to copy all the files to +#' @examples +#' \dontrun{ +#' bcbio_templates("rnaseq", "path_to_projects/project1/reports") +#' } +#' @export +bcbio_templates <- function(type="rnaseq", outpath){ + fs::dir_create(outpath) + switch(type, + rnaseq={ + #file.copy(fpath, outpath, recursive = T) + copy_templates(outpath, "nf-core/rnaseq") + }, + scrnaseq={ + #file.copy(fpath, outpath, recursive = T) + copy_templates(outpath, "singlecell") + }, + { + stop('project type not recognize, please choose: ', 'rnaseq', 'singlecell') + } + ) +} + +read_pipeline_info <- function(nfcore){ + # pipeline_info/params_2024-05-28_12-28-51.json + config <- fs::path_join(c(nfcore, "pipeline_info")) + params <- fs::dir_ls(config, regexp = "params") + metadata <- jsonlite::read_json(params)[["input"]] + # input + # tmp_rna/pipeline_info/software_versions.yml + software <- fs::path_join(c(nfcore, "pipeline_info", "software_versions.yml")) + software_txt <- yaml::read_yaml(software) + pipeline <- grep("nf-core", names(software_txt$Workflow), value = TRUE) + # Workflow: + # Nextflow: 24.04.1 + # nf-core/rnaseq: 3.14.0 + # check only rnaseq is supported + if (!(pipeline %in% c("nf-core/rnaseq"))){ + ui_stop("Sorry, we don't yet support {ui_value(pipeline)}") + } + list(metadata=metadata, pipeline=pipeline) +} + +render_rmd <- function(infile, outfile, ls_data){ + whisker.render(read_file(infile), + ls_data) %>% + write_file(outfile) +} + +bcbio_params <-function(nfcore_path, pipeline, metadata, copy){ + ui_info("Reading input files from {ui_value(nfcore_path)}") + if (pipeline=="nf-core/rnaseq"){ + if (!copy){ + ls_data<-list( + se_object =fs::path_join(c(nfcore_path, "star_salmon/salmon.merged.gene_counts.rds")), + metadata_fn = metadata, + counts_fn = fs::path_join(c(nfcore_path, "star_salmon/salmon.merged.gene_counts.tsv")), + multiqc_data_dir = fs::path_join(c(nfcore_path, "multiqc/star_salmon/multiqc-report-data/")), + gtf_fn = fs::path_join(c(nfcore_path, "genome/genome.filtered.gtf"))) + return(ls_data) + } + } + +} + +copy_files_in_folder<- function(origin, remote){ + to_copy <- fs::dir_ls(origin) + to_copy <- grep("org", to_copy, + value = TRUE, invert = TRUE) + for (element in to_copy){ + full_new_path <- fs::path_join(c(remote, fs::path_file(element))) + + if (fs::is_dir(element)){ + if (!(fs::dir_exists(full_new_path))) + fs::dir_copy(element, full_new_path) + } + if (fs::is_file(element)){ + if (!(fs::file_exists(full_new_path))) + fs::file_copy(element, full_new_path) + } + } +} + +copy_templates <- function(path, pipeline){ + base = c("bcbioR") + if (pipeline=="nf-core/rnaseq"){ + parts = c("templates/rnaseq") + }else if(pipeline=="singlecell"){ + parts = c("templates/singlecell") + }else if(pipeline=="teaseq"){ + parts = c("templates/teaseq") + }else if(pipeline=="cosmx"){ + parts = c("templates/cosmx") + } + analysis_template <- fs::path_package(base, parts) + ui_info("Getting templates from {ui_value(analysis_template)}") + # ls_files <- grep("org", list.files(analysis_template, full.names = TRUE), + # value = TRUE, invert = TRUE) + # ui_info("{ui_value(length(ls_files))} amount of files to copy") + copy_files_in_folder(analysis_template, path) +} + +bcbio_render <- function(path, pipeline, data){ + + if (pipeline=="nf-core/rnaseq"){ + # analysis_template <- fs::path_package("bcbioR", "templates", "rnaseq", "qc") + # fs::dir_copy(analysis_template, fs::path_join(c(path, "reports", "qc")), overwrite=TRUE) + # analysis_template <- fs::path_package("bcbioR", "templates", "rnaseq", "de") + # fs::dir_copy(analysis_template, fs::path_join(c(path, "reports", "de")), overwrite=TRUE) + render_rmd( + fs::path_join(c(path, "reports", "qc", "QC_nf-core.Rmd")), + fs::path_join(c(path, "reports", "qc", "QC_nf-core.Rmd")), + data + ) + render_rmd( + fs::path_join(c(path, "reports", "de", "DEG.Rmd")), + fs::path_join(c(path, "reports", "de", "DEG.Rmd")), + data + ) + ui_info("Please, to start the analysis, modify these parameter in QC/QC.rmd") + ui_todo("set genome to hg38, mm10, mm39, or other") + ui_todo("set factor_of_interest to a column in your metadata") + }else{ + ui_warn("These are draft templates, are meant to show examples of specific analysis") + ui_todo("Please, read carefully and adapt to your data and question.") + } +} + +#' @export +use_bcbio_analysis <- function(path, pipeline, copy=TRUE, metadata=NULL){ + + if (copy){ + # deploy files + ui_info("Rmd templates will be copied but variables path won't be filled automatically.") + if (!is.null(metadata)){ + meta_path <- fs::path_join(c(path, "meta", fs::path_file(metadata))) + if (!(fs::file_exists(metadata))) + ui_stop("{ui_value(metadata)} doesn't exist.") + fs::file_copy(metadata, meta_path) + } + } + if (!is.null(pipeline) & fs::dir_exists(pipeline)){ + # ui_stop("{ui_value(nfcore)} doesn't exist. point to nfcore path or turn on copy mode.") + ui_info("Trying to guess nf-core pipeline at {ui_value(pipeline)}") + # guess analysis from pipeline file + information <- read_pipeline_info(pipeline) + fs::dir_create(fs::path_join(c(path, "meta"))) + meta_path <- fs::path_join(c(path, "meta", fs::path_file(information$metadata))) + pipeline <- information$pipeline + if (!is.null(metadata)){ + if (!(fs::file_exists(metadata))) + ui_stop("{ui_value(metadata)} doesn't exist.") + fs::file_copy(metadata, meta_path) + }else{ + if (!fs::file_exists(information$metadata)){ + ui_warn("{ui_value(metadata)} not found. We can only work with local filesytems right now.") + ui_todo("Please, copy {ui_value(metadata)} to {ui_value(meta_path)}.") + ui_warn("If this file isn't manually set up, the Rmd code will fail.") + }else{ + ui_info("Copy metadata to {ui_value(meta_path)}") + fs::file_copy(information$metadata, meta_path) + } + metadata <- meta_path + } + path_final <- fs::path_join(c(path, "final")) + ui_todo("Please, copy nf-core output directory to {ui_value(path_final)}") + } + # set all files from analysis + copy_templates(fs::path_join(c(path, "reports")), pipeline) + if (fs::dir_exists(pipeline)){ + data <- bcbio_params(nfcore, pipeline, metadata) + bcbio_render(path, pipeline, data) + } + + +} + +#' @export +#' @examples +#' path <- withr::local_tempdir() +#' use_bcbio_projects(path,pipeline="nf-core/rnaseq",copy=TRUE) +#' fs::dir_ls(path) +use_bcbio_projects <- function(path, pipeline=NULL, metadata=NULL, + git=TRUE, gh=FALSE, org=NULL, copy=TRUE) { + + ui_info("Creating project at {ui_value(path)}") + if (!fs::dir_exists(path)) + fs::dir_create(path, mode = "u=xrw,g=xwr,o=r", recurse = TRUE) + + ui_info("Populating base project") + base_template <- fs::path_package("bcbioR", "templates", "base") + copy_files_in_folder(base_template, path) + + if (!is.null(pipeline)){ + ui_info("Using this pipeline templates {ui_value(pipeline)}") + use_bcbio_analysis(path, pipeline, copy = copy, metadata=metadata) + } + # is_nfcore_ready <- FALSE + # if (is.null(pipeline) && rlang::is_interactive()){ + # is_nfcore_ready <- ui_yeah("Have you already run nf-core pipeline?", + # n_yes=1, n_no =1) + # if (is_nfcore_ready && rlang::is_interactive()){ + # nfcore <- readline("? Enter path to nf-core output: ") + # }else{ + # ui_warn("Please, turn copy = TRUE to only deploy files or,") + # ui_stop("Please use {ui_code('use_bcbio_projects')} again when you have the nf-core output.") + # } + # use_bcbio_analysis(path, nfcore, copy, metadata) + # }else{ + # if (fs::dir_exists(nfcore)){ + # ui_info("Checking {ui_value(nfcore)} as nf-core output directory") + # use_bcbio_analysis(path, nfcore, copy, metadata) + # }else if (copy){ + # # deploy only files + # ui_info("Deploying only templates without pipeline information.") + # use_bcbio_analysis(path, nfcore, copy = TRUE, metadata=metadata) + # }else{ + # ui_warn("Please, provide nfcore working directory or") + # ui_warn("turn copy = TRUE to only deploy files.") + # } + # } + + # if (git){ + # ui_info("Create Git local repo at {ui_value(path)}") + # use_git() + # } + # if (gh){ + # ui_info("Create GitHub repo at {ui_value(path)}") + # whoami <- suppressMessages(gh::gh_whoami()) + # if (is.null(whoami)) { + # ui_warn(c( + # "x" = "Unable to discover a GitHub personal access token.", + # "i" = "A token is required in order to create and push to a new repo.", + # "_" = "Call {.run usethis::gh_token_help()} for help configuring a token." + # )) + # ui_todo("Try this later: use_github(organisation=org), private = TRUE") + # + # } + # use_github(organisation=org, private = TRUE) + # }else{ + # ui_info("You decided not to create a repo, please use this to push when ready") + # ui_todo("Try this later: use_github(organisation=org), private = TRUE") + # } + + answer <- FALSE + if (rlang::is_interactive()) + answer <- ui_yeah("Please, read the README.md file as the session starts.Are you ready?", + n_yes=1, n_no =1, shuffle=FALSE) + if (answer) + proj_activate(path) + if (!answer) + ui_info("Please use proj_activate({ui_value(path)})} to start this project.") + +} diff --git a/inst/rmarkdown/templates/common/skeleton/code/placeholder.R b/R/orgs.R similarity index 100% rename from inst/rmarkdown/templates/common/skeleton/code/placeholder.R rename to R/orgs.R diff --git a/inst/apps/app.R b/inst/apps/app.R new file mode 100644 index 0000000..531206b --- /dev/null +++ b/inst/apps/app.R @@ -0,0 +1,51 @@ +# Global variables can go here +library(stringr) +.fix <- function(x){ + x <- tolower(x) + x <- str_replace_all(x, "[[:punct:]]", "_") + x <- str_replace_all(x, " ", "_") + return(x) +} + + +# Define the UI +ui <- fluidPage( + # Application title + titlePanel("Create project name"), + + sidebarLayout( + # Sidebar with a slider and selection inputs + sidebarPanel( + textInput('hbc', 'hbc-code (no letters)', value = "00000"), + textInput('pi', 'What is PI last name:', value = "lastname"), + textInput('scientist', 'What is the scientist last name:', value = "scientist"), + textInput('tech', 'What is the technology:', value = "rnaseq"), + textInput('tissue', 'What is the tissue:', value = "mix|cells|heart"), + textInput('org', 'What is the organism:', value = "mix|human"), + textInput('proj', 'What is the project name:', value = "this_analysis_is_cool"), + + ), + + # Show Word Cloud + mainPanel( + br("Suggested project name:"), + br(), + verbatimTextOutput('project') + ) + ) +) + + +# Define the server code +server <- function(input, output, session) { + output$project <- renderText({ + hbc_code <- paste0("hbc", input$hbc) + project_full <- paste(input$tech, .fix(input$pi), .fix(input$scientist), + .fix(input$proj), + input$tissue, input$org, hbc_code, sep="_") + project_full + }) +} + +# Return a Shiny app object +shinyApp(ui = ui, server = server) diff --git a/inst/orgs/hcbc.yml b/inst/orgs/hcbc.yml new file mode 100644 index 0000000..9f263a5 --- /dev/null +++ b/inst/orgs/hcbc.yml @@ -0,0 +1,4 @@ +name: HCBC +description: Variables specifics to HCBC +github_org: hbc + diff --git a/inst/rmarkdown/templates/cellchat/template.yaml b/inst/rmarkdown/templates/cellchat/template.yaml new file mode 100644 index 0000000..aa08347 --- /dev/null +++ b/inst/rmarkdown/templates/cellchat/template.yaml @@ -0,0 +1,3 @@ +name: bcbio CellChat +description: Standard CellChat analyses +create_dir: false diff --git a/inst/rmarkdown/templates/common/skeleton/.gitignore b/inst/rmarkdown/templates/common/skeleton/.gitignore deleted file mode 100644 index 4cdfa50..0000000 --- a/inst/rmarkdown/templates/common/skeleton/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -*.Rproj* -.Rhistory -.Rproj.user -.Rhistory -.DS* -*._* -*placeholder* -data/* -final/* \ No newline at end of file diff --git a/inst/rmarkdown/templates/common/skeleton/data/readme b/inst/rmarkdown/templates/common/skeleton/data/readme deleted file mode 100644 index e69de29..0000000 diff --git a/inst/rmarkdown/templates/teaseq/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/teaseq/skeleton/skeleton.Rmd deleted file mode 100644 index dc4bbf5..0000000 --- a/inst/rmarkdown/templates/teaseq/skeleton/skeleton.Rmd +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: "General Project Information" -author: "Harvard Chan Bioinformatics Core" -date: "`r Sys.Date()`" -output: - html_document: - code_folding: hide - df_print: paged - highlights: pygments - number_sections: true - self_contained: true - theme: default - toc: true - toc_float: - collapsed: true - smooth_scroll: true -editor_options: - chunk_output_type: console -params: - params_file: information.R ---- - -```{r echo = F} -source(params$params_file) -``` diff --git a/inst/templates/base/.gitignore b/inst/templates/base/.gitignore new file mode 100644 index 0000000..a8c1ef8 --- /dev/null +++ b/inst/templates/base/.gitignore @@ -0,0 +1,17 @@ +*.Rproj* +.Rhistory +.Rproj.user +.Rhistory +.DS* +*._* +*placeholder* +data/* +final/* +.Rproj.user +data/* +docs/* +**/*html +**/*rds +**/*rda +**/*csv +**/*tsv diff --git a/inst/rmarkdown/templates/common/skeleton/README.md b/inst/templates/base/README.md similarity index 72% rename from inst/rmarkdown/templates/common/skeleton/README.md rename to inst/templates/base/README.md index 6218884..d286e6a 100644 --- a/inst/rmarkdown/templates/common/skeleton/README.md +++ b/inst/templates/base/README.md @@ -1,13 +1,24 @@ # Guidelines +## Set Repository + +- Start a git repository: `usethis::use_git()` +- Push this project to GitHub, follow these steps: + +* Only once every 30 days, set up your github credentials: `usethis::gh_token_help()` + * **NOTE** You may want to run this first (one time) to keep this token working in future sessions: `git config --global credential.helper store` + +- Push repository to HBC github as private: `usethis::use_github(org="hbc",private=TRUE)` + ## Set up work-space - [ ] Replace the title in this file to match the project's title - [ ] Modify `information.R` with the right text for this project, it can be used to source in other `Rmd` files. The main `Rmd` file in this directory can be used to show general information of the project if needed. - [ ] Use the same project name to create a folder in *Dropbox* and a repo in *GitHub* -- [ ] Use the function `bcbio_templates` to create templates inside `reports` for each type of analysis. For instance, for *RNAseq*: - - `bcbio_templates(type="rnaseq", outpath="reports")` or - - `bcbio_templates(type="rnaseq", outpath="reports/experiment1")` +- [ ] If you didn't provide the pipeline when creating this project: + Use the function `bcbio_templates` to create templates inside `reports` for each type of analysis. For instance, for *RNAseq*: + - `use_bcbio_analysis(".", 'nf-core/rnaseq', copy = TRUE)` or + - `use_bcbio_analysis(".", 'singlecell', copy = TRUE)` - Then go to that folder and read the `README.md` ## Folders diff --git a/inst/rmarkdown/templates/common/skeleton/information.R b/inst/templates/base/information.R similarity index 100% rename from inst/rmarkdown/templates/common/skeleton/information.R rename to inst/templates/base/information.R diff --git a/inst/rmarkdown/templates/common/skeleton/meta/placeholder.R b/inst/templates/base/meta/placeholder.R similarity index 100% rename from inst/rmarkdown/templates/common/skeleton/meta/placeholder.R rename to inst/templates/base/meta/placeholder.R diff --git a/inst/rmarkdown/templates/common/skeleton/scripts/placeholder b/inst/templates/base/scripts/placeholder similarity index 100% rename from inst/rmarkdown/templates/common/skeleton/scripts/placeholder rename to inst/templates/base/scripts/placeholder diff --git a/inst/rmarkdown/templates/cosmx/skeleton/QC/QC.Rmd b/inst/templates/cosmx/QC/QC.Rmd similarity index 100% rename from inst/rmarkdown/templates/cosmx/skeleton/QC/QC.Rmd rename to inst/templates/cosmx/QC/QC.Rmd diff --git a/inst/rmarkdown/templates/cosmx/skeleton/QC/run_markdown.R b/inst/templates/cosmx/QC/run_markdown.R similarity index 100% rename from inst/rmarkdown/templates/cosmx/skeleton/QC/run_markdown.R rename to inst/templates/cosmx/QC/run_markdown.R diff --git a/inst/rmarkdown/templates/cosmx/skeleton/information.R b/inst/templates/cosmx/information.R similarity index 100% rename from inst/rmarkdown/templates/cosmx/skeleton/information.R rename to inst/templates/cosmx/information.R diff --git a/inst/templates/rnaseq/DE/Comparison-intersections.Rmd b/inst/templates/rnaseq/DE/Comparison-intersections.Rmd new file mode 100644 index 0000000..b073469 --- /dev/null +++ b/inst/templates/rnaseq/DE/Comparison-intersections.Rmd @@ -0,0 +1,258 @@ +--- +title: "Comparing DE Results - Multiple Contrasts" +author: "Harvard Chan Bioinformatics Core" +date: "`r Sys.Date()`" +output: + html_document: + code_folding: hide + df_print: paged + highlights: pygments + number_sections: true + self_contained: true + theme: default + toc: true + toc_float: + collapsed: true + smooth_scroll: true +editor_options: + chunk_output_type: console +params: + project_file: ../information.R +--- + +```{r load_params, cache = FALSE, message = FALSE, warning=FALSE} +## Adjusted P-value used for significance +padj_co <- 0.05 +## Log2FC used for significance. If no cutoff used put 0 +LFC <- 0.5 +## Normalized counts for ALL samples +# norm <- "/Users/emb016/Documents/comparisons_templates/norm_counts.csv" +# Load the count data, for this example it is the last columns of the DE table +norm_counts <- read.csv("https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/cross-comparison/norm_counts.csv.gz", + row.names = 1) + +# Load the meta data, here we are making one for the exmaple +metadata <- read_csv("https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/cross-comparison/meta.csv.gz") %>% as.data.frame() + +## Full results file (all genes) for contrast 1 +files=c("https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/cross-comparison/all_results_DMSO_vs_Group1.csv.gz", + "https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/cross-comparison/all_results_DMSO_vs_Group2.csv.gz", + "https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/cross-comparison/all_results_DMSO_vs_Group3.csv.gz") + +``` + + + +```{r load_libraries, cache = FALSE, message = FALSE, warning=FALSE} +library(rtracklayer) +library(tidyverse) +library(stringr) +library(ggpubr) +library(knitr) +library(bcbioR) +library(ggprism) +library(viridis) +library(pheatmap) +library(janitor) +library(ggvenn) +library(ggplot2) +library(UpSetR) +library(ggprism) +#library(org.Ce.eg.db) +library(org.Hs.eg.db) +#library(org.Mm.eg.db) + +colors=cb_friendly_cols(1:15) +ggplot2::theme_set(theme_prism(base_size = 14)) +opts_chunk[["set"]]( + cache = F, + cache.lazy = FALSE, + dev = c("png", "pdf"), + error = TRUE, + highlight = TRUE, + message = FALSE, + prompt = FALSE, + tidy = FALSE, + warning = FALSE, + echo = T, + fig.height = 4) + +# set seed for reproducibility +set.seed(1234567890L) +``` + + +# Load Data + +We load our dataset + +```{r load_data} + +## Name of contrast. This will be displayed on the figures. +# you can manually indicate a list of names as comp_names=c("name1","name2"...) +comp_names = basename(files) %>% + str_remove_all("all_results_|.csv|.gz") %>% + str_replace_all("_", " ") +names(files)=comp_names +N=length(files) +stopifnot(length(files)==length(comp_names)) + +## Make sure you have set up N above +all_genes=lapply(names(files), function(name){ + data <- read_csv(files[name]) %>% + dplyr::filter(padj <= 1) +}) +sign_genes=lapply(names(files), function(name){ + data <- read_csv(files[name]) %>% + dplyr::filter(padj <= 1) + data %>% + dplyr::filter(padj < padj_co, abs(lfc) > LFC) +}) +``` + + + +# Make list of comparisons + + +```{r, fig.height=8, fig.width=8, warning=FALSE, error=FALSE, message=FALSE} +de=lapply(sign_genes, function(x){ + x$gene_id +}) +names(de) <- comp_names +``` + +## Make an upset plot + +Because we have done so many tests venn diagrams no longer work for our data. Instead we will use upset plots. *These plots are relatively intuitive for 2 or 3 categories, but can tend to get more complex for >3 categories. In all cases, you will find the categories being compared and their size listed below the bar plots on the left. As you look to the right (directly below each bar) there are dots with connecting lines that denote which categories the overlap is between, or if there is no overlap (just a dot). The numbers at the top of the bars denote the size of the overlap.* + + +```{r, fig.height=8, fig.width=12} +upset(fromList(de), order.by = "freq", nsets=N) + +``` + +## Pull intersect(s) of interest + +After identifying intersect(s) of interest we can determine which genes are found in which intersections + + +```{r, warning=FALSE, error=FALSE, message=FALSE} +## Grab intersection +gene_names <- data.frame(gene=unique(unlist(de))) + +df1 <- lapply(de,function(x){ + data.frame(gene = x) +}) %>% + bind_rows(.id = "path") + +df_int <- lapply(gene_names$gene,function(x){ + # pull the name of the intersections + intersection <- df1 %>% + dplyr::filter(gene==x) %>% + arrange(path) %>% + pull("path") %>% + paste0(collapse = "|") + # build the dataframe + data.frame(gene = x,int = intersection) +}) %>% bind_rows() +``` + + +```{r, eval=F} +## Run this code to find the name of your intersect of interest. You will use this in the next code chunk +table(df_int$int) +``` + +```{r, warning=FALSE, error=FALSE, message=FALSE} +## subset interaction of interest replace the intersect name with the name of the intersect from above. You can copy and paste the below commands to grab multiple intersects. + +Intersect1 <- subset(df_int, df_int$int=="DMSO vs Group2|DMSO vs Group3") +``` + +## Get annotation data +```{r, warning=FALSE, error=FALSE, message=FALSE} + +# edit this to be the correct organism. One set of annotations per intersect. +# rdata = AnnotationDbi::select(org.Hs.eg.db, Intersect1$gene, 'SYMBOL', 'ENSEMBL') %>% +# dplyr::select(gene_id = ENSEMBL, gene_name = SYMBOL) %>% distinct(gene_id, .keep_all = T) + +# FIX: following code is only for test data, use the above with real data +rdata=data.frame(gene_id=row.names(norm_counts), gene_name=row.names(norm_counts)) +``` + + + +## Heatmap of intersect + +We generate a heatmap with all samples to see the patterns contained in this intersect. + +```{r, fig.height=6, warning=FALSE, error=FALSE, message=FALSE} +## Assign factors of interest. These need to correspond to columns in your metadata. + +factor1 <- "Treatment" +factor2 <- "Cell_line" + +# Extract significant genes +stopifnot(all(Intersect1$gene %in% row.names(norm_counts))) +sigGenes <- Intersect1$gene + +### Extract normalized expression for significant genes +norm_sig <- norm_counts[sigGenes,] +meta <- data.frame(metadata[,print(factor1)],metadata[,print(factor1)]) +colnames(meta) <- c(print(factor1),print(factor2)) +rownames(meta) <- colnames(norm_sig) +### Set a color palette +heat_colors <- lapply(colnames(norm_sig), function(c){ + l.col=colors[1:length(unique(norm_sig[[c]]))] + names(l.col)=unique(norm_sig[[c]]) + l.col +}) + +### Run pheatmap using the metadata data frame for the annotation (11 x 5) +pheatmap(norm_sig, + color = inferno(10), + cluster_rows = T, + show_rownames = F, + annotation = meta, + annotation_colors = heat_colors, + border_color = NA, + fontsize = 10, + scale = "row", + fontsize_row = 10, + height = 20) +``` + + +## Graph all genes in intersect + +```{r, warning=FALSE, error=FALSE, message=FALSE} +Intersect1_annot <- Intersect1 %>% left_join(rdata, by=c("gene"="gene_id")) +# REMOVE to plot all +Intersect1_annot <- Intersect1_annot[1:10] + +graphs <- length(Intersect1_annot$gene) +to_test <- t(norm_counts) +rna = Intersect1_annot$gene +names = Intersect1_annot$gene_name + +to_graph = data.frame(to_test[,rna]) +to_graph = to_graph[Intersect1_annot$gene] +to_graph$Factor1 <- metadata[,factor1] +to_graph$Factor2 <- metadata[,factor2] + +#out <- vector("list", length = graphs) +for (i in seq(1,graphs)) { + to_graph$temp=to_graph[[i]] + print(ggplot(to_graph,aes(x=Factor1,y=temp,color=Factor2)) + + geom_boxplot() + geom_point(alpha=0.5) + ylab(paste0(names[[i]])) + xlab(factor1) + scale_color_discrete(name = "Covariate")) +} +``` + +# R session + +List and version of tools used for the QC report generation. + +```{r} +sessionInfo() +``` diff --git a/inst/templates/rnaseq/DE/Cross-comparison-analysis.Rmd b/inst/templates/rnaseq/DE/Cross-comparison-analysis.Rmd new file mode 100644 index 0000000..4e78ea6 --- /dev/null +++ b/inst/templates/rnaseq/DE/Cross-comparison-analysis.Rmd @@ -0,0 +1,236 @@ +--- +title: "Comparing DE Results - Pairwise" +author: "Harvard Chan Bioinformatics Core" +date: "`r Sys.Date()`" +output: + html_document: + code_folding: hide + df_print: paged + highlights: pygments + number_sections: true + self_contained: true + theme: default + toc: true + toc_float: + collapsed: true + smooth_scroll: true +editor_options: + chunk_output_type: console +params: + project_file: ../information.R +--- + + + +```{r load_params, cache = FALSE, message = FALSE, warning=FALSE} +# 1. Set up input files in this R file (params_pairwisecomp.R) +## Full results file (all genes) for contrastt 1 +comp1_fn <- 'https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/cross-comparison/all_results_DMSO_vs_Group1.csv.gz' +## Name of contrast 1. This will be displayed on the figures +comp1_name <- "DMSO vs. Group1" +## Full results file (all genes) for contrast 2 +comp2_fn <- 'https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/cross-comparison/all_results_DMSO_vs_Group2.csv.gz' +## Name of contrast 2. This will be displayed on the figures +comp2_name <- "DMSO vs. Group2" +## Adjusted P-value used for significance +padj_co <- 0.05 +## Log2FC used for significance. If no cutoff used put 0 +LFC <- 0.5 + +comp1 <- read_csv(comp1_fn) %>% + dplyr::filter(padj <= 1) +comp2 <- read_csv(comp2_fn) %>% + dplyr::filter(padj <= 1) +``` + + + +```{r load_libraries, cache = FALSE, message = FALSE, warning=FALSE} +library(rtracklayer) +library(tidyverse) +library(stringr) +library(ggpubr) +library(knitr) +library(bcbioR) +library(ggprism) +library(viridis) +library(pheatmap) +library(janitor) +library(ggvenn) +library(ggplot2) + +colors=cb_friendly_cols(1:15) +ggplot2::theme_set(theme_prism(base_size = 14)) +opts_chunk[["set"]]( + cache = F, + cache.lazy = FALSE, + dev = c("png", "pdf"), + error = TRUE, + highlight = TRUE, + message = FALSE, + prompt = FALSE, + tidy = FALSE, + warning = FALSE, + echo = T, + fig.height = 4) + +# set seed for reproducibility +set.seed(1234567890L) +``` + + +# Load Data + +We load our dataset + +```{r load_data} +# this code will load from bcbio or nf-core folder +# NOTE make sure to set numerator and denominator + +comp1_sig <- comp1 %>% + dplyr::filter(padj < padj_co, abs(lfc) > LFC) + +comp2_sig <- comp2 %>% + dplyr::filter(padj < padj_co, abs(lfc) > LFC) +``` + + + +# Comparisons + +We start with a venn diagram looking at the overlap between our two contrasts + +```{r, fig.height=8, fig.width=8} +name1 <- rlang::ensym(comp1_name) +name2 <- rlang::ensym(comp2_name) +names <- c(name1, name2) + +full <- list(comp1_sig$gene_id,comp2_sig$gene_id) +names(full) <-names + +ggvenn(full, show_percentage = F) + +``` + +## Compare effect sizes and direction + +We plot Log2FC for our contrasts and color points by whether or not they are significant in our contrasts. The black line is 1:1. + + +```{r fig.height=6, fig.width=8} +# Edit based on the data you are using + +#make sure to only use genes present in both results files +test_intersect <- intersect(comp1$gene_id, comp2$gene_id) +comp1_sub <- subset(comp1, comp1$gene_id %in% test_intersect) +comp2_sub <- subset(comp2, comp2$gene_id %in% test_intersect) + +## Check that gene names match +all(comp1_sub$gene_id== comp2_sub$gene_id) + +## Gather necessary data +lfc <- data.frame(comp1_sub$gene_id, comp1_sub$gene_name, comp1_sub$lfc, comp2_sub$lfc) +colnames(lfc) <- c("gene_id","gene_name", "comp1", "comp2") + +# subset to only include genes in both datasets and color by grouping +DE_comp1 <- setdiff(comp1_sig$gene_id, comp2_sig$gene_id) +DE_comp2 <- setdiff(comp2_sig$gene_id, comp1_sig$gene_id) +DE_both <- intersect(comp2_sig$gene_id, comp1_sig$gene_id) +not_sig <- comp1_sub$gene_id[!(comp1_sub$gene_id %in% c(DE_comp1,DE_comp2,DE_both))] + + +col <- rep(4, nrow(lfc)) +col[lfc$gene_id %in% not_sig] <- 1 +col[lfc$gene_id %in% DE_comp1] <- 2 +col[lfc$gene_id %in% DE_comp2] <- 3 +col[lfc$gene_id %in% DE_both] <- 4 + + +lfc$col <- lfc %>% + dplyr::mutate(color = case_when( + gene_id %in% DE_both ~ 3, + gene_id %in% DE_comp1 ~ 1, + gene_id %in% DE_comp2 ~ 2, + gene_id %in% not_sig ~ 8 + )) %>% pull(color) +lfc$col <- as.factor(lfc$col) + + +ggplot(lfc, aes(x=comp1, y=comp2, color=col)) + geom_point() + + labs(color="Group") + + scale_color_discrete(name = "Group", labels = c(paste0("Only DE in ",paste0(comp1_name)), paste0("Only DE in ",paste0(comp2_name)),"DE in both comparisons", "Not Significant")) + + geom_abline(intercept=0, slope=1) + + geom_hline(aes(yintercept=0)) + + geom_vline(aes(xintercept=0)) + + scale_color_cb_friendly() + + xlab(paste0("Log2FC in ",paste0(comp1_name))) + + ylab(paste0("Log2FC in ",paste0(comp2_name))) + +``` + + + +## Compare ajusted P-values + +We plot adjusted P-values for our contrasts and color points by whether or not they are significant in our contrasts. The black line is 1:1. + + +```{r fig.height=6, fig.width=8} +# Edit based on the data you are using + +#make sure to only use genes present in both results files +test_intersect <- intersect(comp1$gene_id, comp2$gene_id) +comp1_sub <- subset(comp1, comp1$gene_id %in% test_intersect) +comp2_sub <- subset(comp2, comp2$gene_id %in% test_intersect) + +## Check that gene names match +all(comp1_sub$gene_id== comp2_sub$gene_id) + +## Gather necessary data +lfc <- data.frame(comp1_sub$gene_id, comp1_sub$gene_name, comp1_sub$padj, comp2_sub$padj) +colnames(lfc) <- c("gene_id","gene_name", "comp1", "comp2") + +# subset to only include genes in both datasets and color by grouping +DE_comp1 <- setdiff(comp1_sig$gene_id, comp2_sig$gene_id) +DE_comp2 <- setdiff(comp2_sig$gene_id, comp1_sig$gene_id) +DE_both <- intersect(comp2_sig$gene_id, comp1_sig$gene_id) +not_sig <- comp1_sub$gene_id[!(comp1_sub$gene_id %in% c(DE_comp1,DE_comp2,DE_both))] + + +col <- rep(4, nrow(lfc)) +col[lfc$gene_id %in% not_sig] <- 1 +col[lfc$gene_id %in% DE_comp1] <- 2 +col[lfc$gene_id %in% DE_comp2] <- 3 +col[lfc$gene_id %in% DE_both] <- 4 + + +lfc$col <- lfc %>% + dplyr::mutate(color = case_when( + gene_id %in% DE_both ~ 3, + gene_id %in% DE_comp1 ~ 1, + gene_id %in% DE_comp2 ~ 2, + gene_id %in% not_sig ~ 8 + )) %>% pull(color) +lfc$col <- as.factor(lfc$col) + + +ggplot(lfc, aes(x=-log10(comp1), y=-log10(comp2), color=col)) + + geom_point() + labs(color="Group") + + scale_color_discrete(name = "Group", labels = c(paste0("-Log10 adjusted p-value ",paste0(comp1_name)), paste0("-Log10 adjusted p-value ",paste0(comp2_name)),"DE in both comparisons", "Not Significant")) + + geom_abline(intercept=0, slope=1) + + geom_hline(aes(yintercept=0)) + + geom_vline(aes(xintercept=0)) + + scale_color_cb_friendly() + + xlab(paste0("Log2FC in ",paste0(comp1_name))) + + ylab(paste0("Log2FC in ",paste0(comp2_name))) + +``` + + +# R session + +List and version of tools used for the QC report generation. + +```{r} +sessionInfo() +``` diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/DE/params_de-example.R b/inst/templates/rnaseq/DE/params_de-example.R similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/DE/params_de-example.R rename to inst/templates/rnaseq/DE/params_de-example.R diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/DE/params_de.R b/inst/templates/rnaseq/DE/params_de.R similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/DE/params_de.R rename to inst/templates/rnaseq/DE/params_de.R diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/DE/run_markdown.R b/inst/templates/rnaseq/DE/run_markdown.R similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/DE/run_markdown.R rename to inst/templates/rnaseq/DE/run_markdown.R diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/QC/QC.Rmd b/inst/templates/rnaseq/QC/QC.Rmd similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/QC/QC.Rmd rename to inst/templates/rnaseq/QC/QC.Rmd diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/QC/params_qc.R b/inst/templates/rnaseq/QC/params_qc.R similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/QC/params_qc.R rename to inst/templates/rnaseq/QC/params_qc.R diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/QC/params_qc_nf-core.R b/inst/templates/rnaseq/QC/params_qc_nf-core.R similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/QC/params_qc_nf-core.R rename to inst/templates/rnaseq/QC/params_qc_nf-core.R diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/QC/run_markdown.R b/inst/templates/rnaseq/QC/run_markdown.R similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/QC/run_markdown.R rename to inst/templates/rnaseq/QC/run_markdown.R diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/README.md b/inst/templates/rnaseq/README.md similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/README.md rename to inst/templates/rnaseq/README.md diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/DE/DEG.Rmd b/inst/templates/rnaseq/de/DEG.Rmd similarity index 99% rename from inst/rmarkdown/templates/rnaseq/skeleton/DE/DEG.Rmd rename to inst/templates/rnaseq/de/DEG.Rmd index 5da794c..c2069be 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/DE/DEG.Rmd +++ b/inst/templates/rnaseq/de/DEG.Rmd @@ -175,7 +175,6 @@ rdata = AnnotationDbi::select(org.Hs.eg.db, rownames(counts), 'SYMBOL', 'ENSEMBL ``` ```{r setup_RUV} - dds_to_use <- DESeqDataSetFromMatrix(counts, coldata, design = ~1) vsd_before <- vst(dds_to_use) @@ -400,6 +399,9 @@ res <- as.data.frame(resLFCS) %>% relocate(gene_name) %>% dplyr::rename(lfc = log2FoldChange) %>% mutate(pi = abs(lfc) * -log10(padj)) %>% arrange(-pi) +## Filter out genes that have no expression or were filtered out by DESEQ2 +res <- res[res$baseMean>0,] %>% drop_na(padj) %>% drop_na(pvalue) + res_sig <- res %>% filter(padj < 0.05) %>% arrange(padj) %>% mutate(gene_name = ifelse(is.na(gene_name), gene_id, gene_name)) diff --git a/inst/templates/rnaseq/de/Multiplicative_DE_docs.md b/inst/templates/rnaseq/de/Multiplicative_DE_docs.md new file mode 100644 index 0000000..aa35021 --- /dev/null +++ b/inst/templates/rnaseq/de/Multiplicative_DE_docs.md @@ -0,0 +1,129 @@ +# Overview + +This is an example of complex DE analysis with multiple covariates with multiple levels. + +We have the SEX variable (2 levels) and the GENOTYPE VARIABLE (4 levels) + +# Intercept Analysis + +``` +# Model design and creating dds object from the dataset +design = ~sex + genotype + sex:genotype +dds <- DESeqDataSet(se_Striatum, design) +``` + +## Filtering lowly expressed genes +We are filtering out genes with fewer than 10 raw counts in total and are present in fewer than 3 samples. + +``` +keep <- rowSums(counts(dds)>=10) >=4 +dds <- dds[keep, ] +#dds # comment out this line to print the dds object and compare the dimension of the dataset before and after filtering is applied. +``` + +setting up WT as reference genotype and Male and reference sex. Otherwise DESeq2 will use the conditions in their alphabetical order. + +``` +dds$genotype <- relevel(dds$genotype, ref = "WT") +dds$sex <- relevel(dds$sex, ref = "Male") + +#Checking model design and reference condition comment out the three lines below to print the design and order of genotype and sex +design(dds) +levels(dds$genotype) +levels(dds$sex) + +#estimating size factors for normalization and fitting our model with DESeq model +dds <- estimateSizeFactors(dds) +dds <- DESeq(dds) +resultsNames(dds) #This will print out the name of coefficients being compared, comment it out to view + +# get coefficient matrix +mod_mat <- model.matrix(design(dds), data = colData(dds)) +mod_mat + +(Intercept) sexFemale genotypeCR3KO genotypeQ175 genotypeQ175_CR3KO sexFemale:genotypeCR3KO sexFemale:genotypeQ175 sexFemale:genotypeQ175_CR3KO +a10_st_q175_m_r1 1 0 0 1 0 0 0 0 +a12_st_q175_f_r1 1 1 0 1 0 0 1 0 +a14_st_wt_m_r1 1 0 0 0 0 0 0 0 +a16_st_wt_f_r1 1 1 0 0 0 0 0 0 +``` + + +coefficient weights extracted from the mod_mat above + +``` +WT_M <- c(1, 0, 0, 0, 0, 0, 0, 0) +WT_F <- c(1, 1, 0, 0, 0, 0, 0, 0) +WTCR3ko_M <- c(1, 0, 1, 0, 0, 0, 0, 0) +WTCR3ko_F <- c( 1, 1, 1, 0, 0, 1, 0, 0) +Q175_M <- c(1, 0, 0, 1, 0, 0, 0, 0) +Q175_F <- c(1, 1, 0, 1, 0, 0, 1, 0) +Q175CR3ko_M <- c(1, 0, 0, 0, 1, 0, 0, 0) +Q175CR3ko_F <- c(1, 1, 0, 0, 1, 0, 0, 1) +``` + +# Differential gene expression analysis + +## Comp_2: Female vs Male : WTCR3ko +``` +comp2_F.v.M_WTCR3ko <- results(dds, contrast = c(WTCR3ko_F - WTCR3ko_M)) +comp2_F.v.M_WTCR3ko_shrink <- lfcShrink(dds, contrast = c(WTCR3ko_F - WTCR3ko_M), type = "ashr") +summary(comp2_F.v.M_WTCR3ko) +``` + +## Comp_3: Female vs Male : Q175 +``` +comp3_F.v.M_Q175 <- results(dds, contrast = c(Q175_F - Q175_M)) +comp3_F.v.M_Q175_shrink <- lfcShrink(dds, contrast = c(Q175_F - Q175_M), type = "ashr") +summary(comp3_F.v.M_Q175) +``` + +## Comp_5: WTCR3ko vs WT : Male +```{r} +comp5_WTCR3ko.v.WT_Male <- results(dds, contrast = c(WTCR3ko_M - WT_M)) +comp5_WTCR3ko.v.WT_Male_shrink <- lfcShrink(dds, contrast = c(WTCR3ko_M - WT_M), type = "ashr") +summary(comp5_WTCR3ko.v.WT_Male) +``` + +## Comp_6: WTCR3ko vs WT : Female +```{r} +comp6_WTCR3ko.v.WT_Female <- results(dds, contrast = c(WTCR3ko_F - WT_F)) +comp6_WTCR3ko.v.WT_Female_shrink <- lfcShrink(dds, contrast = c(WTCR3ko_F - WT_F), type = "ashr") +summary(comp6_WTCR3ko.v.WT_Female) +``` + +## Comp_11: Q175CR3ko vs Q175 : Male +``` +comp11_Q175CR3ko.v.Q175_Male <- results(dds, contrast = c(Q175CR3ko_M - Q175_M)) +comp11_Q175CR3ko.v.Q175_Male_shrink <- lfcShrink(dds, contrast = c(Q175CR3ko_M - Q175_M), type = "ashr") +summary(comp11_Q175CR3ko.v.Q175_Male) +``` + +## Comp_12: Q175CR3ko vs Q175 : Female +``` +comp12_Q175CR3ko.v.Q175_Female <- results(dds, contrast = c(Q175CR3ko_F - Q175_F)) +comp12_Q175CR3ko.v.Q175_Female_shrink <- lfcShrink(dds, contrast = c(Q175CR3ko_F - Q175_F), type = "ashr") +summary(comp12_Q175CR3ko.v.Q175_Female) +``` + +## Comp_15: (Q175CR3ko-Q176) - (WTCR3ko - WT) : Male + +Does the CR3 knockout in Q175 differ from CR3 knockout in WT for Males? + +``` +comp15_CR3koinQ175.v.CR3koinWT_Male <- results(dds, + contrast = c(Q175CR3ko_M - Q175_M) - (WTCR3ko_M - WT_M)) +comp15_CR3koinQ175.v.CR3koinWT_Male_shrink <- lfcShrink(dds, + contrast = c(Q175CR3ko_M - Q175_M) - (WTCR3ko_M - WT_M), type = "ashr") +summary(comp15_CR3koinQ175.v.CR3koinWT_Male) +``` + +## Comp_17: (WTCR3koall) - (WTall) + +Does the average of the samples in WTCR3KO differ from average of the samples in WT + +``` +comp17_WTCR3all.v.WTall <- results(dds, contrast = c(WTCR3ko_M + WTCR3ko_F)/2 - (WT_M + WT_F)/2) +comp17_WTCR3all.v.WTall_shrink <- lfcShrink(dds, contrast = c((WTCR3ko_M + WTCR3ko_F)/2 - (WT_M + WT_F)/2), type = "ashr") +summary(comp17_WTCR3all.v.WTall) +``` diff --git a/inst/templates/rnaseq/de/PCA_variance_analysis.Rmd b/inst/templates/rnaseq/de/PCA_variance_analysis.Rmd new file mode 100644 index 0000000..aadabec --- /dev/null +++ b/inst/templates/rnaseq/de/PCA_variance_analysis.Rmd @@ -0,0 +1,35 @@ +--- +title: "PCA with variance analysis" +author: "Harvard Chan Bioinformatics Core" +--- + +```{r } +library(DEGreport) +library(ggplot2) +library(ggforce) + +data("bcbio_vsd_data") + +colors=cb_friendly_cols(1:15) +ggplot2::theme_set(theme_prism(base_size = 14)) + +pca <- degPCA(assay(bcbio_vsd_data), colData(bcbio_vsd_data), + condition = "sample_type", name = "sample", data = T) + +pca$plot + ggtitle(paste0("All samples", "\nPCA using ", nrow(vst), " genes")) + + theme(plot.title=element_text(hjust=0.5)) + + geom_mark_ellipse(aes(color = sample_type)) +``` + +Information on [betadisper](https://uw.pressbooks.pub/appliedmultivariatestatistics/chapter/permdisp/) to do analyses of multivariate homogeneity of group dispersions (variances). + +```{r} +# NOTE:This is not confirmed to be a valid test but it could help to understand the data +library(vegan) +vare.disa <- vegdist(t(assay(bcbio_vsd_data))) + +mod = betadisper(vare.disa, colData(bcbio_vsd_data)[['sample_type']]) +anova(mod) +``` + + diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/DE/load_data.R b/inst/templates/rnaseq/de/load_data.R similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/DE/load_data.R rename to inst/templates/rnaseq/de/load_data.R diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/information.R b/inst/templates/rnaseq/information.R similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/information.R rename to inst/templates/rnaseq/information.R diff --git a/inst/templates/rnaseq/org/hcbc_README.md b/inst/templates/rnaseq/org/hcbc_README.md new file mode 100644 index 0000000..50f8f16 --- /dev/null +++ b/inst/templates/rnaseq/org/hcbc_README.md @@ -0,0 +1,75 @@ +# Guideline for RNAseq downstream analysis + +Make sure there is a project name for this. + +## Run data with nf-core rnaseq + +- Make sure you have access to our [Seqera WorkSpace](https://cloud.seqera.io/orgs/HBC/workspaces/core_production/launchpad) +- Transfer data to HCBC S3: Ask Alex/Lorena. Files will be at our S3 bucket `input/rawdata` folder +- Prepare the CSV file according this [instructions](https://nf-co.re/rnaseq/3.14.0/docs/usage#multiple-runs-of-the-same-sample). File should look like this: + +```csv +sample,fastq_1,fastq_2,strandedness +CONTROL_REP1,s3path/AEG588A1_S1_L002_R1_001.fastq.gz,s3path/AEG588A1_S1_L002_R2_001.fastq.gz,auto +CONTROL_REP1,s3path/AEG588A1_S1_L003_R1_001.fastq.gz,s3path/AEG588A1_S1_L003_R2_001.fastq.gz,auto +CONTROL_REP1,s3path/AEG588A1_S1_L004_R1_001.fastq.gz,s3path/AEG588A1_S1_L004_R2_001.fastq.gz,auto +``` + +Use `bcbio_nfcore_check(csv_file)` to check the file is correct. + +You can add more columns to this file with more metadata, and use this file as the `coldata` file the templates. + +- Upload file to our `Datasets` in Seqera using the name of the project but starting with `nfcore-rnaseq` +- Go to `Launchpad`, select `nf-core_rnaseq` pipeline, and select the previous created `Datasets` in the `input` parameter after clicking in `Browser` + - Select an output directory with the same name used for the `Dataset` inside the `results` folder in S3 +- When pipeline is down, data will be copied to our on-premise HPC in the scratch system under `scratch/groups/hsph/hbc/bcbio/` folder + +## Downstream analysis + +Please, modify `information.R` with the right information. You can use this file with any other Rmd to include the project/analysis information. + +### QC + +`QC/QC.Rmd` is a template for QC metrics. Use `params_qc.R` for `bcbio` + or `QC/QC_nf-core.Rmd` `params_qc_nf-core.R` for `nf-core/rnaseq` outputs. + +Read instruction in the R and Rmd scripts to render it. + +### DE + +`DE/DEG.Rmd` is a template for two groups comparison. `params_de.R` has the information of the input files to load. You can point to `bcbio` or `nf-core/rnaseq` output files. + +On the `YAML` header file of the `Rmd` you can specify some parameters or just set them up in the first chunk of code of the template. This template has examples of: + +- sub-setting data +- two groups comparison +- volcano plot +- MA plot +- Pathway analysis +- Tables + +There are some code related to alternative analysis: + +- `DE/Multiplicative_DE_docs.md` that shows some cases when there is multiple variables in the model with multiple levels: sex (2 levels) and genotype (4 levels) + +## DropBox + +- In `reports/QC` + - [ ] copy `bcbio-se.rds` and `tximport-counts.csv` + - [ ] copy QC `Rmd/R/html/figures` +- In `reports/DE` + - [ ] Normalized counts for all genes x all samples (csv format) +- In `reports/DE`, for *each analysis*: + - **Note** For multiple comparisons/analysis, do a single report/template if possible in the parent folder using parameters whenever possible. + - Create a folder with the comparison names in the files. Numbering by comparison (`01.1_DE_comp1`, `01.2_DE_comp2`, etc.). If you’re running multiple models for the same comparison, append `_M#`. Add the following files under each folder: + - [ ] Normalized count table with the samples used in this analysis/comparison. + - [ ] Full results `DESeq2` for all genes (csv format) with annotation columns appended. + - [ ] Significant genes results file (subset of annotated full results by chosen p-value and LFC). Separate files will be created for each individual contrast. + - [ ] Significant genes results file as described above, but additionally append columns containing normalized count values for each sample. + - Make sure to append the gene symbols to these tables so the researcher can interpret the results. + +## GitHub + +- [ ] Push all `*Rmd` `*R` files used for the *QC* and *DE* analysis respecting folder structure. + +Please, ignore `*html/figures/csv` and any output of the code. diff --git a/inst/templates/rnaseq/qc/QC-bcbio.Rmd b/inst/templates/rnaseq/qc/QC-bcbio.Rmd new file mode 100644 index 0000000..aca91c1 --- /dev/null +++ b/inst/templates/rnaseq/qc/QC-bcbio.Rmd @@ -0,0 +1,409 @@ +--- +title: "Quality Control" +author: "Harvard Chan Bioinformatics Core" +date: "`r Sys.Date()`" +output: + html_document: + code_folding: hide + df_print: paged + highlights: pygments + number_sections: true + self_contained: true + theme: default + toc: true + toc_float: + collapsed: true + smooth_scroll: true +editor_options: + chunk_output_type: console +params: + params_file: params_qc.R +--- + + +```{r source_params, echo = F} +metadata_fn='https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/bcbio/coldata.csv' +se_object=url("https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/bcbio/bcbio-se.rds") +``` + +```{r load_libraries, cache = FALSE, message = FALSE, warning=FALSE} +library(tidyverse) +library(knitr) +library(DESeq2) +library(DEGreport) +library(ggrepel) +library(pheatmap) +# library(RColorBrewer) +library(DT) +library(pheatmap) +library(bcbioR) +ggplot2::theme_set(theme_light(base_size = 14)) +opts_chunk[["set"]]( + cache = FALSE, + cache.lazy = FALSE, + dev = c("png", "pdf"), + error = TRUE, + highlight = TRUE, + message = FALSE, + prompt = FALSE, + tidy = FALSE, + warning = FALSE, + fig.height = 4) +``` + + +```{r subchunkify, echo=FALSE, eval=FALSE} +#' Create sub-chunks for plots +#' +#' taken from: https://stackoverflow.com/questions/15365829/dynamic-height-and-width-for-knitr-plots +#' +#' @param pl a plot object +#' @param fig.height figure height +#' @param fig.width figure width +#' @param chunk_name name of the chunk +#' +#' @author Andreas Scharmueller \email{andschar@@protonmail.com} +#' +subchunkify = function(pl, + fig.height = 7, + fig.width = 5, + chunk_name = 'plot') { + pl_deparsed = paste0(deparse(function() { + pl + }), collapse = '') + + sub_chunk = paste0( + "```{r ", + chunk_name, + ", fig.height=", + fig.height, + ", fig.width=", + fig.width, + ", dpi=72", + ", echo=FALSE, message=FALSE, warning=FALSE, fig.align='center'}", + "\n(", + pl_deparsed, + ")()", + "\n```" + ) + + cat(knitr::knit( + text = knitr::knit_expand(text = sub_chunk), + quiet = TRUE + )) +} + +``` + + +```{r sanitize-datatable} +sanitize_datatable = function(df, ...) { + # remove dashes which cause wrapping + DT::datatable(df, ..., rownames=gsub("-", "_", rownames(df)), + colnames=gsub("-", "_", colnames(df))) +} +``` + +# Overview + +- Project: `r project` +- PI: `r PI` +- Analyst: `r analyst` +- Experiment: `r experiment` +- Aim: `r aim` + + +# Samples and metadata + +```{r load_metadata} +meta_df=read_csv(metadata_fn) %>% mutate(sample = tolower(description)) %>% + dplyr::select(-description) + +ggplot(meta_df, aes(sample_type, fill = sample_type)) + + geom_bar() + ylab("") + xlab("") + + scale_fill_cb_friendly() +``` + + +```{r show-metadata} +se <- readRDS(se_object) #local + + +metrics <- metadata(se)$metrics %>% + full_join(meta_df , by = c("sample" = "sample")) + +meta_sm <- meta_df %>% + as.data.frame() %>% + column_to_rownames("sample") + +meta_sm %>% sanitize_datatable() + +``` + +# Read metrics {.tabset} + +## Total reads + +Here, we want to see consistency and a minimum of 20 million reads. + +```{r plot_total_reads} +metrics %>% + ggplot(aes(x = sample_type, + y = total_reads, + color = sample_type)) + + geom_point(alpha=0.5) + + coord_flip() + + scale_y_continuous(name = "million reads") + + scale_color_cb_friendly() + + ggtitle("Total reads") + +``` + +```{r calc_min_max_pct_mapped} +#get min percent mapped reads for reference +min_pct_mapped <- round(min(metrics$mapped_reads/metrics$total_reads)*100,1) +max_pct_mapped <- round(max(metrics$mapped_reads/metrics$total_reads)*100,1) +``` + +## Mapping rate + +The genomic mapping rate represents the percentage of reads mapping to the reference genome. We want to see consistent mapping rates between samples and over 70% mapping. These samples have mapping rates (`r min_pct_mapped` - `r max_pct_mapped`%). + +```{r plot_mapping_rate} +metrics$mapped_reads_pct <- round(metrics$mapped_reads/metrics$total_reads*100,1) +metrics %>% + ggplot(aes(x = sample_type, + y = mapped_reads_pct, + color = sample_type)) + + geom_point() + + coord_flip() + + scale_color_cb_friendly() + + ylim(0, 100) + + ggtitle("Mapping rate") + + geom_hline(yintercept=70, color = cb_friendly_cols('blue')) +``` + + +## Number of genes detected + +The number of genes represented in every sample is expected to be consistent and over 20K (blue line). + +```{r plot_genes_detected} +genes_detected <- colSums(assays(se)[["raw"]] > 0) %>% enframe() +sample_names <- metrics[,c("sample"), drop=F] +genes_detected <- left_join(genes_detected, sample_names, by = c("name" = "sample")) +genes_detected <- genes_detected %>% group_by(name) +genes_detected <- summarise(genes_detected, + n_genes = max(value)) + +metrics <- metrics %>% + left_join(genes_detected, by = c("sample" = "name")) +ggplot(metrics,aes(x = sample_type, + y = n_genes, color = sample_type)) + + geom_point() + + coord_flip() + + scale_color_cb_friendly() + + ggtitle("Number of genes") + + ylab("Number of genes") + + xlab("") + + geom_hline(yintercept=20000, color = cb_friendly_cols('blue')) +``` + + +## Gene detection saturation + +This plot shows how complex the samples are. We expect samples with more reads to detect more genes. + +```{r plot_gene_saturation} +metrics %>% + ggplot(aes(x = total_reads, + y = n_genes, + color = sample_type)) + + geom_point()+ + scale_x_log10() + + scale_color_cb_friendly() + + ggtitle("Gene saturation") + + ylab("Number of genes") +``` + +## Exonic mapping rate + +Here we are looking for consistency, and exonic mapping rates around 70% or 75% (blue and red lines, respectively). + +```{r plot_exonic_mapping_rate} +metrics %>% + ggplot(aes(x = sample_type, + y = exonic_rate * 100, + color = sample_type)) + + geom_point() + + ylab("Exonic rate %") + + ggtitle("Exonic mapping rate") + + scale_color_cb_friendly() + + coord_flip() + + xlab("") + + ylim(c(0,100)) + + geom_hline(yintercept=70, color = cb_friendly_cols('blue')) + + geom_hline(yintercept=75, color = cb_friendly_cols('brown')) +``` + +## Intronic mapping rate + +Here, we expect a low intronic mapping rate (≤ 15% - 20%) + +```{r plot_intronic_mapping_rate} +metrics %>% + ggplot(aes(x = sample_type, + y = intronic_rate * 100, + color = sample_type)) + + geom_point() + + ylab("Intronic rate %") + + ggtitle("Intronic mapping rate") + + scale_color_cb_friendly() + + coord_flip() + + xlab("") + + ylim(c(0,100)) + + geom_hline(yintercept=20, color = cb_friendly_cols('blue')) + + geom_hline(yintercept=15, color = cb_friendly_cols('brown')) +``` + +## Intergenic mapping rate + +Here, we expect a low intergenic mapping rate, which is true for all samples. + +```{r plot_intergenic_mapping_rate} +metrics %>% + ggplot(aes(x = sample_type, + y = intergenic_rate * 100, + color = sample_type)) + + geom_point() + + ylab("Intergenic rate %") + + ggtitle("Intergenic mapping rate") + + coord_flip() + + scale_color_cb_friendly() + + ylim(c(0, 100)) +``` + +## rRNA mapping rate + +Samples should have a ribosomal RNA (rRNA) "contamination" rate below 10% + +```{r plot_rrna_mapping_rate} +# for some bad samples it could be > 50% +rrna_ylim <- max(round(metrics$r_rna_rate*100, 2)) + 10 +metrics %>% + ggplot(aes(x = sample_type, + y = r_rna_rate * 100, + color = sample_type)) + + geom_point() + + ylab("rRNA rate, %")+ + ylim(0, rrna_ylim) + + ggtitle("rRNA mapping rate") + + coord_flip() + + scale_color_cb_friendly() +``` + +## 5'->3' bias + +There should be little bias, i.e. the values should be close to 1, or at least consistent among samples + +```{r plot_53_bias} +metrics %>% + ggplot(aes(x = sample_type, + y = x5_3_bias, + color = sample_type)) + + geom_point() + + ggtitle("5'-3' bias") + + coord_flip() + + ylim(c(0.5,1.5)) + + scale_color_cb_friendly()+ + geom_hline(yintercept=1, color = cb_friendly_cols('blue')) +``` + +## Counts per gene - all genes + +We expect consistency in the box plots here between the samples, i.e. the distribution of counts across the genes is similar + +```{r plot_counts_per_gene} +metrics_small <- metrics %>% dplyr::select(sample, sample_type) +metrics_small <- left_join(sample_names, metrics_small) + +counts <- + assays(se)[["raw"]] %>% + as_tibble() %>% + filter(rowSums(.)!=0) %>% + gather(name, counts) + +counts <- left_join(counts, metrics, by = c("name" = "sample")) + +ggplot(counts, aes(sample_type, + log2(counts+1), + fill = sample_type)) + + geom_boxplot() + + scale_fill_cb_friendly() + + ggtitle("Counts per gene, all non-zero genes") + + scale_color_cb_friendly() +``` + + +# Sample similarity analysis + +In this section, we look at how well the different groups in the dataset cluster with each other. Samples from the same group should ideally be clustering together. We use Principal Component Analysis (PCA). + +## Principal component analysis (PCA) {.tabset} + +Principal Component Analysis (PCA) is a statistical technique used to simplify high-dimensional data by identifying patterns and reducing the number of variables. In the context of gene expression, PCA helps analyze large datasets containing information about the expression levels of thousands of genes across different samples (e.g., tissues, cells). + + +```{r PCA1:5 summary, all, unlabeled, fig.width= 7, fig.height = 5} +raw_counts <- assays(se)[["raw"]] %>% + as_tibble() %>% + filter(rowSums(.)!=0) %>% + as.matrix() + +vst <- vst(raw_counts) + +#fix samples names +coldat_for_pca <- as.data.frame(metrics) +rownames(coldat_for_pca) <- coldat_for_pca$sample +coldat_for_pca <- coldat_for_pca[colnames(raw_counts),] +pca1 <- degPCA(vst, coldat_for_pca, + condition = "sample_type", data = T)[["plot"]] +pca2 <- degPCA(vst, coldat_for_pca, + condition = "sample_type", data = T, pc1="PC3", pc2="PC4")[["plot"]] + +pca1 + scale_color_cb_friendly() +pca2 + scale_color_cb_friendly() +``` + + +```{r, eval=FALSE} +variables=degCovariates(vst, coldat_for_pca) +``` + + +```{r clustering fig, fig.width = 10, fig.asp = .62} +## Hierarchical clustering + +vst_cor <- cor(vst) + +annotation_cols <- cb_friendly_pal('grey')(length(unique(coldat_for_pca$sample_type))) +names(annotation_cols) <- unique(coldat_for_pca$sample_type) + +p <- pheatmap(vst_cor, + annotation = coldat_for_pca %>% select(sample_type) %>% mutate(sample_type = as.factor(sample_type)), + show_rownames = T, + show_colnames = T, + color = cb_friendly_pal('heatmap')(15), + annotation_colors = list(sample_type = annotation_cols) +) +p + +``` + +# R session + +List and version of tools used for the QC report generation. + +```{r} +sessionInfo() +``` diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/QC/QC_nf-core.Rmd b/inst/templates/rnaseq/qc/QC_nf-core.Rmd similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/QC/QC_nf-core.Rmd rename to inst/templates/rnaseq/qc/QC_nf-core.Rmd diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/QC/params_qc_nf-core-example.R b/inst/templates/rnaseq/qc/params_qc_nf-core-example.R similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/QC/params_qc_nf-core-example.R rename to inst/templates/rnaseq/qc/params_qc_nf-core-example.R diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/QC/placeholder b/inst/templates/rnaseq/qc/placeholder similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/QC/placeholder rename to inst/templates/rnaseq/qc/placeholder diff --git a/inst/rmarkdown/templates/singlecell/skeleton/Integration/helpers.R b/inst/templates/singlecell/Integration/helpers.R similarity index 100% rename from inst/rmarkdown/templates/singlecell/skeleton/Integration/helpers.R rename to inst/templates/singlecell/Integration/helpers.R diff --git a/inst/rmarkdown/templates/singlecell/skeleton/README.md b/inst/templates/singlecell/README.md similarity index 100% rename from inst/rmarkdown/templates/singlecell/skeleton/README.md rename to inst/templates/singlecell/README.md diff --git a/inst/templates/singlecell/cellchat.Rmd b/inst/templates/singlecell/cellchat.Rmd new file mode 100644 index 0000000..d4cc6f4 --- /dev/null +++ b/inst/templates/singlecell/cellchat.Rmd @@ -0,0 +1,440 @@ +--- +title: "CellChat" +author: "Harvard Chan Bioinformatics Core" +date: "`r Sys.Date()`" +output: + html_document: + code_folding: hide + df_print: paged + highlights: pygments + number_sections: false + self_contained: true + theme: default + toc: true + toc_float: + collapsed: true + smooth_scroll: true +editor_options: + chunk_output_type: console +params: + project_file: information.R + seurat_fn: ../data/fDat_sn_RC.rds + cellchat_fn: ../data/snrna_cellchat.qs + cellchat_grade2_fn: ../data/snrna_cellchat_grade2.qs + cellchat_grade0_fn: ../data/snrna_cellchat_grade0.qs +--- + +```{r load_libraries, cache = FALSE, message = FALSE, warning=FALSE, echo=FALSE,} + +reticulate::use_virtualenv("/n/app/bcbio/R4.3.1_python_cellchat") +reticulate::py_config() # should show v3.9.14 +Sys.getenv("PYTHONPATH") # should be empty + +current_libs <- .libPaths() +.libPaths(c('/n/app/bcbio/R4.3.1_cellchat/', current_libs)) +library(CellChat) + +library(tidyverse) +library(Seurat) +library(bcbioR) +library(ggprism) +library(knitr) +library(tools) +library(qs) +library(patchwork) +library(ComplexHeatmap) + +options(stringsAsFactors = FALSE) + +colors=cb_friendly_cols(1:15) +ggplot2::theme_set(theme_prism(base_size = 14)) +opts_chunk[["set"]]( + cache = F, + cache.lazy = FALSE, + dev = c("png", "pdf"), + error = TRUE, + highlight = TRUE, + message = FALSE, + prompt = FALSE, + tidy = FALSE, + warning = FALSE, + echo = T, + fig.height = 4) + +# set seed for reproducibility +set.seed(1234567890L) + +cellchat_ran <- file.exists(params$cellchat_fn) +cellchat_rejection_ran <- file.exists(params$cellchat_grade2_fn) & file.exists(params$cellchat_grade0_fn) + +``` + +# Clustering + +```{r load_data } + +snrna <- readRDS(params$seurat_fn) + +# in this case, Chris_annot = cell_type +DimPlot(snrna, reduction = 'umap', group.by = 'Chris_annot') + +``` + +```{r prep cellchat inputs, eval = !cellchat_ran } + +# need to use normalized counts as input +data.input <- snrna[["SCT"]]@data +labels <- snrna$Chris_annot +meta <- data.frame(labels = labels, row.names = names(labels), samples = snrna$orig.ident) + +``` + +```{r create cellchat object, eval = !cellchat_ran } +cellchat <- createCellChat(object = data.input, meta = meta, group.by = "labels") + +``` + +```{r set cellchat db, eval = !cellchat_ran} +CellChatDB <- CellChatDB.human +CellChatDB.use <- subsetDB(CellChatDB) +cellchat@DB <- CellChatDB.use + +``` + +```{r subset and preprocess data, eval = !cellchat_ran } + +cellchat <- subsetData(cellchat) +cellchat <- updateCellChat(cellchat) +future::plan("multisession", workers = 8) # recommend running with at 8-16 cores +cellchat <- identifyOverExpressedGenes(cellchat) # may take a couple minutes +cellchat <- identifyOverExpressedInteractions(cellchat) # may take a couple minutes + +``` + +```{r compute communication prob, eval = !cellchat_ran} + +# Not recommended: project gene expression data onto protein-protein interaction network. +# Useful with shallow sequencing depth but introduces many weak communications. +# If used, must set raw.use = FALSE when running computeCommunProb +# cellchat <- projectData(cellchat, PPI.human) + + +# this next command takes 0.5-2+ hours +# can choose various methods for caculating average gene exp per group, +# 'triMean' allegedly produces fewer but stronger interactions +cellchat <- computeCommunProb(cellchat, type = "triMean") + +# filter out the cell-cell communication if < 50 cells per group +cellchat <- filterCommunication(cellchat, min.cells = 50) + +qsave(cellchat, '../data/snrna_cellchat.qs', preset = 'fast') + +``` + +# Overall Results + +```{r load cellchat, eval = cellchat_ran} +cellchat <- qread(params$cellchat_fn) + +df.net <- subsetCommunication(cellchat) %>% dplyr::arrange(pval) +df.net %>% sanitize_datatable() + +``` + +## Top interactions {.tabset} +```{r check pairs, results = 'asis', fig.width = 8, fig.height = 12} + +top_ints <- (df.net %>% pull(interaction_name) %>% unique)[1:10] +for (interaction in top_ints){ + cat('\n') + cat('### ', as.character(interaction), '\n') + interactors <- unlist(strsplit(as.character(interaction), '_')) + p1 <- VlnPlot(snrna, features = interactors, group.by = 'Chris_annot', + pt.size = 0.1, log = T, ncol = 1) + print(p1) + cat('\n') +} + +``` + +```{r compute pathway communication probs, eval = cellchat_ran} +cellchat <- computeCommunProbPathway(cellchat) +cellchat <- aggregateNet(cellchat) + +``` + +## Visualize Cell-Cell Communication Networks + +```{r chord plots, fig.width = 10, fig.height = 8, eval = cellchat_ran} + +groupSize <- as.numeric(table(cellchat@idents)) +par(mfrow = c(1,2), xpd=TRUE) +netVisual_circle(cellchat@net$count, vertex.weight = rowSums(cellchat@net$count), + weight.scale = T, label.edge= F, title.name = "Number of interactions") +netVisual_circle(cellchat@net$weight, vertex.weight = rowSums(cellchat@net$weight), + weight.scale = T, label.edge= F, title.name = "Interaction weights/strength") + +``` + +```{r heatmaps, eval = cellchat_ran} + +netVisual_heatmap(cellchat, measure = "count", color.heatmap = "Blues") +netVisual_heatmap(cellchat, measure = "weight", color.heatmap = "Blues") + +``` + +# Comparison Results + +Here we run the CellChat analysis twice, once on the Grade 2 rejection samples and once on the Grade 0 rejection samples. We compare the significant signaling interactions and investigate changes in them between rejection grades. + +```{r prep inputs rejection, eval=!cellchat_rejection_ran} + +grade2 <- subset(snrna, orig.ident %in% c('BRI-2396', 'BRI-2402')) +grade0 <- subset(snrna, orig.ident %in% c('BRI-2395', 'BRI-2411')) + +data.input_grade2 <- grade2[["SCT"]]@data +labels_grade2 <- grade2$Chris_annot +meta_grade2 <- data.frame(labels = labels_grade2, row.names = names(labels_grade2), samples = grade2$orig.ident) + +data.input_grade0 <- grade0[["SCT"]]@data +labels_grade0 <- grade0$Chris_annot +meta_grade0 <- data.frame(labels = labels_grade0, row.names = names(labels_grade0), samples = grade0$orig.ident) + +``` + +```{r create cellchat object rejection, eval=!cellchat_rejection_ran} +cellchat_grade2 <- createCellChat(object = data.input_grade2, meta = meta_grade2, group.by = "labels") +cellchat_grade0 <- createCellChat(object = data.input_grade0, meta = meta_grade0, group.by = "labels") + +``` + +```{r subset and preprocess data rejection, eval=!cellchat_rejection_ran} + +future::plan("multisession", workers = 8) # recommend running with at 8-16 cores + +cellchat_grade2@DB <- CellChatDB.use +cellchat_grade0@DB <- CellChatDB.use + +cellchat_grade2 <- subsetData(cellchat_grade2) +cellchat_grade2 <- updateCellChat(cellchat_grade2) +cellchat_grade2 <- identifyOverExpressedGenes(cellchat_grade2) # may take a couple minutes +cellchat_grade2 <- identifyOverExpressedInteractions(cellchat_grade2) # may take a couple minutes + +cellchat_grade0 <- subsetData(cellchat_grade0) +cellchat_grade0 <- updateCellChat(cellchat_grade0) +cellchat_grade0 <- identifyOverExpressedGenes(cellchat_grade0) # may take a couple minutes +cellchat_grade0 <- identifyOverExpressedInteractions(cellchat_grade0) # may take a couple minutes + +``` + +```{r compute communication prob rejection, eval=!cellchat_rejection_ran} +cellchat_grade2 <- computeCommunProb(cellchat_grade2, type = "triMean") # command takes 0.5-2+ hours +cellchat_grade2 <- filterCommunication(cellchat_grade2, min.cells = 50) +qsave(cellchat_grade2, params$cellchat_grade2_fn, preset = 'fast') + +cellchat_grade0 <- computeCommunProb(cellchat_grade0, type = "triMean") # command takes 0.5-2+ hours +cellchat_grade0 <- filterCommunication(cellchat_grade0, min.cells = 50) +qsave(cellchat_grade0, params$cellchat_grade0_fn, preset = 'fast') + +``` + +```{r load cellchat rejection, eval = cellchat_rejection_ran} + +cellchat_grade2 <- qread(params$cellchat_grade2_fn) +cellchat_grade0 <- qread(params$cellchat_grade0_fn) + +cellchat_grade2 <- filterCommunication(cellchat_grade2, min.cells = 50) +cellchat_grade0 <- filterCommunication(cellchat_grade0, min.cells = 50) + +df.net_grade2 <- subsetCommunication(cellchat_grade2)%>% dplyr::arrange(pval) +df.net_grade0 <- subsetCommunication(cellchat_grade0)%>% dplyr::arrange(pval) + +``` + +## Grade 2 + +```{r datatable grade 2, eval = cellchat_rejection_ran} +df.net_grade2 %>% sanitize_datatable() + +``` + +### Top interactions {.tabset} +```{r check pairs grade 2, results = 'asis', fig.width = 8, fig.height = 12} + +top_ints <- (df.net_grade2 %>% pull(interaction_name) %>% unique)[1:10] +for (interaction in top_ints){ + cat('\n') + cat('#### ', as.character(interaction), '\n') + interactors <- unlist(strsplit(as.character(interaction), '_')) + p1 <- VlnPlot(snrna, features = interactors, group.by = 'Chris_annot', pt.size = 0.1, log = T, ncol = 1) + print(p1) + cat('\n') +} + +``` + + +## Grade 0 + +```{r datatable grade 0, eval = cellchat_rejection_ran} +df.net_grade0 %>% sanitize_datatable() + +``` + +### Top interactions {.tabset} +```{r check pairs grade 0, results = 'asis', fig.width = 8, fig.height = 12} + +top_ints <- (df.net_grade0 %>% pull(interaction_name) %>% unique)[1:10] +for (interaction in top_ints){ + cat('\n') + cat('#### ', as.character(interaction), '\n') + interactors <- unlist(strsplit(as.character(interaction), '_')) + p1 <- VlnPlot(snrna, features = interactors, group.by = 'Chris_annot', pt.size = 0.1, log = T, ncol = 1) + print(p1) + cat('\n') +} + +``` + +```{r merge rejection objects, eval = cellchat_rejection_ran} + +cellchat_grade2 <- computeCommunProbPathway(cellchat_grade2) +cellchat_grade2 <- aggregateNet(cellchat_grade2) +cellchat_grade2 <- netAnalysis_computeCentrality(cellchat_grade2) +cellchat_grade0 <- computeCommunProbPathway(cellchat_grade0) +cellchat_grade0 <- aggregateNet(cellchat_grade0) +cellchat_grade0 <- netAnalysis_computeCentrality(cellchat_grade0) + +object.list <- list(grade0 = cellchat_grade0, grade2 = cellchat_grade2) +cellchat_merged <- mergeCellChat(object.list, add.names = names(object.list)) + +df.net_merged <- subsetCommunication(cellchat_merged) + +``` + +## Compare Interactions/Interaction Strength + +```{r compare interactions, eval = cellchat_rejection_ran} + +gg1 <- compareInteractions(cellchat_merged, show.legend = F, group = c(1,2)) +gg2 <- compareInteractions(cellchat_merged, show.legend = F, group = c(1,2), measure = "weight") +gg1 + gg2 + +``` + +```{r chord plots merged, eval = cellchat_rejection_ran, fig.width = 10, fig.height = 8} +par(mfrow = c(1,2), xpd=TRUE) +netVisual_diffInteraction(cellchat_merged, weight.scale = T) +netVisual_diffInteraction(cellchat_merged, weight.scale = T, measure = "weight") + +``` + +```{r heatmaps merged, eval = cellchat_rejection_ran, fig.width = 10, fig.height = 8} + +gg1 <- netVisual_heatmap(cellchat_merged) +gg2 <- netVisual_heatmap(cellchat_merged, measure = "weight") +gg1 + gg2 + +``` + +## Compare Major Pathway Sources and Targets + +From the CellChat documentation: "Comparing the outgoing and incoming interaction strength in a 2D space allows ready identification of the cell populations with significant changes in sending or receiving signals between different datasets." + +```{r compare send/receive changes, eval = cellchat_rejection_ran, fig.width = 10, fig.height = 6} + +num.link <- sapply(object.list, function(x) {rowSums(x@net$count) + colSums(x@net$count)-diag(x@net$count)}) +weight.MinMax <- c(min(num.link), max(num.link)) # control the dot size in the different datasets +gg <- list() +for (i in 1:length(object.list)) { + gg[[i]] <- netAnalysis_signalingRole_scatter(object.list[[i]], title = names(object.list)[i], weight.MinMax = weight.MinMax) +} +patchwork::wrap_plots(plots = gg) +``` + + +```{r identify signaling changes, eval = cellchat_rejection_ran, fig.width = 12, fig.height = 12} +gg1 <- netAnalysis_signalingChanges_scatter(cellchat_merged, idents.use = "Vascular_EC") +gg2 <- netAnalysis_signalingChanges_scatter(cellchat_merged, idents.use = "Lymphatic_EC") +gg3 <- netAnalysis_signalingChanges_scatter(cellchat_merged, idents.use = "Pericyte") +patchwork::wrap_plots(plots = list(gg1,gg2,gg3), nrow = 3, ncol = 1) + +``` + +## Cluster Altered Signaling Interactions + +From the CellChat documentation: "CellChat performs joint manifold learning and classification of the inferred communication networks based on their functional and topological similarity across different conditions. + +By quantifying the similarity between the cellular communication networks of signaling pathways across conditions, this analysis highlights the potentially altered signaling pathways. CellChat adopts the concept of network rewiring from network biology and hypothesized that the difference between different communication networks may affect biological processes across conditions. UMAP is used for visualizing signaling relationship and interpreting our signaling outputs in an intuitive way without involving the classification of conditions. + +Functional similarity: High degree of functional similarity indicates major senders and receivers are similar, and it can be interpreted as the two signaling pathways or two ligand-receptor pairs exhibit similar and/or redundant roles. + +Structural similarity: A structural similarity was used to compare their signaling network structure, without considering the similarity of senders and receivers." + + +### Based on Functional Similarity + +```{r identify signaling groups functional, eval = cellchat_rejection_ran} + +cellchat_merged <- computeNetSimilarityPairwise(cellchat_merged, type = "functional") +cellchat_merged <- netEmbedding(cellchat_merged, type = "functional") +cellchat_merged <- netClustering(cellchat_merged, type = "functional") +netVisual_embeddingPairwise(cellchat_merged, type = "functional", label.size = 3.5) + +``` + +### Based on Structural Similarity + +```{r identify signaling groups structural, eval = cellchat_rejection_ran} +cellchat_merged <- computeNetSimilarityPairwise(cellchat_merged, type = "structural") +cellchat_merged <- netEmbedding(cellchat_merged, type = "structural") +cellchat_merged <- netClustering(cellchat_merged, type = "structural") +netVisual_embeddingPairwise(cellchat_merged, type = "structural", label.size = 3.5) +``` + +## Compare Overall Signaling Information Flow + +"CellChat can identify the conserved and context-specific signaling pathways by simply comparing the information flow for each signaling pathway, which is defined by the sum of communication probability among all pairs of cell groups in the inferred network (i.e., the total weights in the network)." + +```{r info flow, fig.height = 9, eval = cellchat_rejection_ran} + +rankNet(cellchat_merged, mode = "comparison", measure = "weight", sources.use = NULL, targets.use = NULL, stacked = F, do.stat = TRUE) + +``` + +## Compare Signaling Patterns Across Cell Populations + +"In this heatmap, colobar represents the relative signaling strength of a signaling pathway across cell groups (Note that values are row-scaled). The top colored bar plot shows the total signaling strength of a cell group by summarizing all signaling pathways displayed in the heatmap. The right grey bar plot shows the total signaling strength of a signaling pathway by summarizing all cell groups displayed in the heatmap." + + +```{r outgoing signaling, fig.height = 9, eval = cellchat_rejection_ran} + +i = 1 +pathway.union <- union(object.list[[i]]@netP$pathways, object.list[[i+1]]@netP$pathways) +ht1 = netAnalysis_signalingRole_heatmap(object.list[[i]], pattern = "outgoing", signaling = pathway.union, title = names(object.list)[i], width = 5, height = 16, cluster.cols = T) +ht2 = netAnalysis_signalingRole_heatmap(object.list[[i+1]], pattern = "outgoing", signaling = pathway.union, title = names(object.list)[i+1], width = 5, height = 16, cluster.cols = T) +draw(ht1 + ht2, ht_gap = unit(0.5, "cm")) +``` + +```{r incoming signaling, fig.height = 9, eval = cellchat_rejection_ran} +ht1 = netAnalysis_signalingRole_heatmap(object.list[[i]], pattern = "incoming", signaling = pathway.union, title = names(object.list)[i], width = 5, height = 16, cluster.cols = T) +ht2 = netAnalysis_signalingRole_heatmap(object.list[[i+1]], pattern = "incoming", signaling = pathway.union, title = names(object.list)[i+1], width = 5, height = 16, cluster.cols = T) +draw(ht1 + ht2, ht_gap = unit(0.5, "cm")) +``` + +## Identify Dysfunctional Interaction Signaling Using Communication Probabilities + +"CellChat can identify the up-regulated (increased) and down-regulated (decreased) signaling ligand-receptor pairs in one dataset compared to the other dataset by comparing the communication probability between two datasets for each L-R pair and each pair of cell groups" + +```{r compare signaling, fig.height = 12, fig.width = 8, eval = cellchat_rejection_ran} + +gg1 <- netVisual_bubble(cellchat_merged, + # sources.use = c('Vascular_EC', 'Lymphatic_EC', 'Pericyte'), + # targets.use = c('Vascular_EC', 'Lymphatic_EC', 'Pericyte'), + comparison = c(1, 2), + max.dataset = 2, + title.name = "Increased signaling in Grade 2", + angle.x = 45, + remove.isolate = T) +gg1 +signaling.grade2_increased = gg1$data \ No newline at end of file diff --git a/inst/rmarkdown/templates/singlecell/skeleton/information.R b/inst/templates/singlecell/information.R similarity index 100% rename from inst/rmarkdown/templates/singlecell/skeleton/information.R rename to inst/templates/singlecell/information.R diff --git a/inst/templates/singlecell/scRNA_qc_template.rmd b/inst/templates/singlecell/scRNA_qc_template.rmd new file mode 100644 index 0000000..60bb9de --- /dev/null +++ b/inst/templates/singlecell/scRNA_qc_template.rmd @@ -0,0 +1,400 @@ +--- +title: "scRNA QC" +output: html_document +date: "`r Sys.Date()`" +params: + ## If you have Ribosomal ratio in your raw seurat object put this as TRUE otherwise leave as FALSE + ribosomal: FALSE +--- + +# Overview + +- Project: project +- PI: PI +- Analyst: analyst + + +```{r, eval=FALSE} +### READ ME FIRST + +# This is a template for scRNA QC to present to your client. The actual QC can be done using our rshiny app: + +# After you have decided on your QC metrics load your raw object (i.e. right after you first read data into seurat) and create your QC object by editing lines 49-67. + +# Edit text line 246 with your chosen QC cutoffs! +``` + + + +```{r setup, include=FALSE} +library(Seurat) +library(tidyverse) +library(ggplot2) + +knitr::opts_chunk[["set"]]( + cache = FALSE, + dev = c("png", "pdf"), + error = TRUE, + highlight = TRUE, + message = FALSE, + prompt = FALSE, + tidy = FALSE, + warning = FALSE, + fig.height = 4, + echo=FALSE) +``` + + + +```{r load and filter} +## Load data + +seurat_raw <- seurat_clust <- readRDS("seurat_pre-filtered.rds") + +## Creat QC object USE METRICS YOU CHOSE IN THE RSHINY APP + +seurat_qc <- subset(x = seurat_raw, + subset = (nCount_RNA >= 1500) + & (nFeature_RNA >= 2200) + & (mitoRatio < 0.1) + ## & (riboRatio < 0.4) + & (Log10GenesPerUMI > 0.80) + ) + + +## Save QC object + +saveRDS(seurat_qc, file = "seurat_post-QC.rds") + +``` + + + +```{r prep-info} + +## Prep information for plotting +metadata0 <- seurat_raw@meta.data + +metadata0 = metadata0 %>% dplyr::rename(nUMI = nCount_RNA, + nGene = nFeature_RNA) + +metadata1 <- seurat_qc@meta.data + + +metadata1 = metadata1 %>% dplyr::rename(nUMI = nCount_RNA, + nGene = nFeature_RNA) +``` + + +# QC metrics: raw data {.tabset} + +In this section, we review quality control (QC) metrics for the **raw feature matrices** generated by `Cellranger`. Only a low level filter excluding cells with <100 nUMIs (= number of unique molecular identifiers, or sequenced reads per cell) was applied when uploading the data into `R`. + + +## Cells per sample + +```{r cells raw} +table(metadata0$orig.ident) + +``` + + +## UMIs per cell + +Here, we look at the distribution of UMIs (unique molecular identifiers, or sequenced reads) per cell (droplet) in the dataset. Before QC, we expect a biomodal distribution with a first *small* peak at low numbers of UMIs (<250) corresponding to droplets that encapsulated background/dying cells, and a second higher peak centered at >1000. The line is at 250. + + +```{r raw_nUMIs} +metadata0 %>% + ggplot(aes(x = nUMI, color = orig.ident, fill = orig.ident)) + + geom_density(alpha = 0.2) + + theme_classic() + + ylab("Cell density") + scale_x_log10() + + geom_vline(xintercept = 250) + + facet_wrap(. ~ orig.ident) + + ggtitle("UMIs per cell in raw dataset") +``` + + + +```{r} +# Visualize the distribution of nUMIs per cell (boxplot) +metadata0 %>% + ggplot(aes(x=orig.ident, y=log10(nUMI), fill=orig.ident)) + + geom_violin() + geom_boxplot(width = 0.1, fill = alpha("white", 0.7)) + + theme_classic() + + geom_hline(yintercept = c(log10(1000), log10(50000))) + + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +``` + +## Genes per cell + +Here, we look at the number of different genes that were detected in each cell. By "detected", we mean genes with a non-zero read count measurement. Gene detection in the range of 500 to 5000 is normal for most single-cell experiments. The line is at 750. + +```{r raw_nGene} +# Visualize the distribution of genes detected per cell (histogram) +metadata0 %>% + ggplot(aes(x = nGene, color = orig.ident, fill = orig.ident)) + + geom_density(alpha = 0.2) + + theme_classic() + + scale_x_log10() + + geom_vline(xintercept = c(700)) + + facet_wrap(. ~ orig.ident) + + ggtitle("Detected genes per cell in raw dataset") +``` + + +```{r} +# Visualize the distribution of nUMIs per cell (boxplot) +metadata0 %>% + ggplot(aes(x=orig.ident, y=log10(nGene), fill=orig.ident)) + + geom_violin() + geom_boxplot(width = 0.1, fill = alpha("white", 0.7)) + + theme_classic() + + geom_hline(yintercept = c(log10(700))) + + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +``` + + +## Mitochondrial ratio + +We evaluate overall mitochondrial gene expression as a biomarker of cellular stress during sample preparation. Typically, we expect mitochondrial genes to account for <20% of overall transcripts in each cell. The line indicates 10%. + +```{r raw_mito, warning=FALSE} +# Visualize the distribution of mitochondrial gene expression detected per cell +metadata0 %>% + ggplot(aes(color = orig.ident, x = mitoRatio, fill = orig.ident)) + + geom_density(alpha = 0.2) + + scale_x_log10() + + theme_classic() + + geom_vline(xintercept = c(0.1)) + + facet_wrap(. ~ surgery) + + ggtitle("Percentage of mitochondrial gene expression per cell in raw dataset") +``` + + + +```{r raw_ribo, eval=ribosomal, warning=FALSE, results='asis'} + +cat("## Ribosomal ratio \n") + +cat("We evaluate overall ribosomal gene expression. The line indicates 5%. \n" +) +# Visualize the distribution of mitochondrial gene expression detected per cell +metadata0 %>% + ggplot(aes(color = orig.ident, x = riboRatio, fill = orig.ident)) + + geom_density(alpha = 0.2) + + scale_x_log10() + + theme_classic() + + geom_vline(xintercept = c(0.05)) + + facet_wrap(. ~ orig.ident) + + ggtitle("Percentage of ribosomal gene expression per cell in raw dataset") +``` + + +## UMIs vs. Genes + +By plotting the number of UMIs per cell (x-axis) vs. the number of genes per cell (y-axis), we can visually assess whether there is a large proportion of low quality cells with low read counts and/or gene detection (bottom left quadrant of the plot). In the following representation, cells are further color-coded based on the percentage of mitochondrial genes found among total detected genes. The line for nUMI is at 1000 and the line for nGene is at 700. + +```{r raw_gene_by_umi, fig.height=12, fig.width=15, warning=FALSE} +# Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs +metadata0 %>% + ggplot(aes(x=nUMI, y=nGene, color=mitoRatio)) + + geom_point() + + stat_smooth(method=lm) + + scale_x_log10() + + scale_y_log10() + + theme_classic() + + geom_vline(xintercept = 1000) + + geom_hline(yintercept = 700) + + ggtitle("Genes vs. nUMIs in raw dataset") + + facet_wrap(~orig.ident) +``` + + +## Complexity + +Another way to assess the quality and purity of a single-cell dataset is to look for cells that have fewer detected genes per UMI than others. Typical values for this metric are >0.8 for most cells. Cells with lower diversity in the genes they express may be low-complexity cell types such as red blood cells. With sorted populations, we expect high purity and a very similar complexity distribution across samples. + +```{r raw_novelty} +# Visualize the overall novelty of the gene expression by visualizing the genes detected per UMI +metadata0 %>% + ggplot(aes(x = Log10GenesPerUMI, color = orig.ident, fill = orig.ident)) + + geom_density(alpha = 0.2) + + theme_classic() + + geom_vline(xintercept = c(0.85)) + + facet_wrap(. ~ orig.ident) + + ggtitle("log10(Genes per UMI) in raw dataset") +``` + + +```{r} +# Visualize the distribution of nUMIs per cell (boxplot) +metadata0 %>% + ggplot(aes(x=orig.ident, Log10GenesPerUMI, fill=orig.ident)) + + geom_violin() + geom_boxplot(width = 0.1, fill = alpha("white", 0.7)) + + theme_classic() + + geom_hline(yintercept = c(0.8, 0.85)) + + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +``` + + +# QC metrics: Filtered data {.tabset} + +Based on the above QC metrics, we filtered the dataset to isolate cells passing the following thresholds: **>250 UMIs, >250 genes, <0.2 mitochondrial gene ratio, and >0.8 complexity**. + +In this section, we review QC metrics for our filtered dataset. + +## Cells per sample + +```{r cells filtered} +table(metadata1$orig.ident) + +``` + + +## UMIs per cell + +The line is at 1000 + +```{r qc1_nUMIs} +metadata1 %>% + ggplot(aes(color = orig.ident, x = nUMI, fill = orig.ident)) + + geom_density(alpha = 0.2) + + scale_x_log10() + + theme_classic() + + ylab("Cell density") + xlab("nUMI") + + geom_vline(xintercept = c(1000)) + + facet_wrap(. ~ orig.ident) +``` + + +```{r} +# Visualize the distribution of nUMIs per cell (boxplot) +metadata1 %>% + ggplot(aes(x=orig.ident, y=log10(nUMI), fill=orig.ident)) + + geom_violin() + geom_boxplot(width = 0.1, fill = alpha("white", 0.7)) + + theme_classic() + + geom_hline(yintercept = c(log10(1000))) + + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +``` + + +## Genes detected + +The line is at 750 + +```{r qc1_genes} +# Visualize the distribution of genes detected per cell via histogram +metadata1 %>% + ggplot(aes(color = orig.ident, x = nGene, fill= orig.ident)) + + geom_density(alpha = 0.2) + + theme_classic() + + scale_x_log10() + xlab("nGene") + + facet_wrap(. ~ orig.ident) + + geom_vline(xintercept = c(750)) +``` + + +```{r} +# Visualize the distribution of nUMIs per cell (boxplot) +metadata1 %>% + ggplot(aes(x=orig.ident, y=log10(nGene), fill=orig.ident)) + + geom_violin() + geom_boxplot(width = 0.1, fill = alpha("white", 0.7)) + + theme_classic() + + geom_hline(yintercept = c(log10(7500))) + + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + + theme(plot.title = element_text(hjust = 0.5, face = "bold")) + +``` + + +## Mitochondrial ratio + +The line is at 10%. + +```{r qc1_mitoratio, message=FALSE, warning=FALSE} +# Visualize the distribution of mitochondrial gene expression detected per cell +metadata1 %>% + ggplot(aes(color = orig.ident, x = mitoRatio, fill = orig.ident)) + + geom_density(alpha = 0.2) + + scale_x_log10() + + theme_classic() + + geom_vline(xintercept = 0.1) + + facet_wrap(. ~ surgery) +``` + + +```{r qc1_ribo, eval=ribosomal, warning=FALSE, results='asis'} + +cat("## Ribosomal ratio \n") + +cat("We evaluate overall ribosomal gene expression. The line indicates 10%. \n" +) +# Visualize the distribution of mitochondrial gene expression detected per cell +metadata0 %>% + ggplot(aes(color = orig.ident, x = riboRatio, fill = orig.ident)) + + geom_density(alpha = 0.2) + + scale_x_log10() + + theme_classic() + + geom_vline(xintercept = c(0.1)) + + facet_wrap(. ~ orig.ident) + + ggtitle("Percentage of ribosomal gene expression per cell in raw dataset") +``` + + +## UMIs vs. Genes + +Both the horizontal and vertical lines are at 1000. + +```{r qc1_genes_per_UMI, fig.height=12, fig.width=15, warning=FALSE} +# Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs +metadata1 %>% + ggplot(aes(x = nUMI, y = nGene, color = mitoRatio)) + + geom_point() + + stat_smooth(method=lm) + + scale_x_log10() + + scale_y_log10() + + theme_classic() + + geom_vline(xintercept = c(1000)) + + geom_hline(yintercept = c(1000)) + + ggtitle("Genes vs. nUMIs in raw dataset") + + xlab("nUMI") + ylab("nGene") + + facet_wrap(~orig.ident) +``` + + +## Complexity + +```{r qc1_complexity} +# Visualize the overall novelty of the gene expression by visualizing the genes detected per UMI +metadata1 %>% + ggplot(aes(x = Log10GenesPerUMI, color = orig.ident, fill = orig.ident)) + + geom_density(alpha = 0.2) + + theme_classic() + + #geom_vline(xintercept = c(0.85)) + + facet_wrap(. ~ orig.ident) +``` + +```{r} +# Visualize the distribution of nUMIs per cell (boxplot) +metadata1 %>% + ggplot(aes(x=orig.ident, Log10GenesPerUMI, fill=orig.ident)) + + geom_violin() + geom_boxplot(width = 0.1, fill = alpha("white", 0.7)) + + theme_classic() + + #geom_hline(yintercept = c(0.85)) + + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +``` + + + +# R session + +```{r} +sessionInfo() +``` + diff --git a/inst/rmarkdown/templates/singlecell/skeleton/skeleton.Rmd b/inst/templates/singlecell/skeleton.Rmd similarity index 100% rename from inst/rmarkdown/templates/singlecell/skeleton/skeleton.Rmd rename to inst/templates/singlecell/skeleton.Rmd diff --git a/inst/templates/singlecell/starting_steps.md b/inst/templates/singlecell/starting_steps.md new file mode 100644 index 0000000..1e10850 --- /dev/null +++ b/inst/templates/singlecell/starting_steps.md @@ -0,0 +1,354 @@ +--- +title: "From raw data to Seurat" +--- + + +# Overview + +This tutorial assumes that you are starting with 10x genomic data that has not yet been run through cellranger. If you have output files from cellranger (raw_feature_bc_matrix.h5) files skip to step 2. + +# Step 1 running cellranger + +## Set up + +Here are the steps that need to be completed prior to running cellranger. + +### Locate or create your genome + +#### I have mouse or human + +We have prebuilt references for mouse and human located here: + +#### I have another genome + +It is easy to generate a cellranger reference for any genome. All you need as input are a fasta file and a gtf file. Here is some [information](https://kb.10xgenomics.com/hc/en-us/articles/115003327112-How-can-we-add-genes-to-a-reference-package-for-Cell-Ranger) about what is required for the gtf file. + +**Note: what is listed as "gene_id" (required in gtf) or "gene_name" (if used will be preferred) will be your row names (i.e. gene names). Make sure this is something useful or can be connected to information on what these genes are.** + +Below is an example script for a non-model reference + +``` +#!/bin/sh +#SBATCH --partition=short +#SBATCH -o run.o +#SBATCH -e run.e +#SBATCH -t 0-2:30 +#SBATCH -c 1 +#SBATCH --mem=48G + +module load cellranger/7.1.0 + +cellranger mkref \ + --genome=my_nonmodel_genome \ + --fasta=/path/to/my/genome/fasta/file/my_nonmodel_genome.fasta \ + --genes=/path/to/my/genome/annotation/file/my_nonmodel_genome.gtf + +``` + + +### Fastq data + +You should have a number of output files from each sample. These should look like those below: + +``` +sample1_I1_001.fastq.gz +sample1_I2_001.fastq.gz +sample1_R1_001.fastq.gz +sample1_R2_001.fastq.gz +``` + +Cellranger will be looking for both the I and R files. If you do not have both you may have to run demultiplexing, [See here](https://www.10xgenomics.com/support/software/cell-ranger/latest/analysis/inputs/cr-mkfastq). + +**NOTE: you may have multiple lanes per sample. there is no need to concatentate these prior to running cellranger.** + + +It is best to create one folder per sample with that sample name and put all of the files there. + +**Cellranger expects 1 folder per sample** + +Here is an example file structure + +``` +fastq_files +├── sample1 +│   ├── sample1_I1_001.fastq.gz +│   ├── sample1_I2_001.fastq.gz +│   ├── sample1_R1_001.fastq.gz +│   ├── sample1_R2_001.fastq.gz +├── sample2 +│   ├── sample2_I1_001.fastq.gz +``` + +## Run Cellranger + +The easiest way to run cellranger is using the array feature on O2. [Here](https://github.com/hbc/knowledgebase/blob/master/rc/arrays_in_slurm.md) is a tutorial on arrays. + +To run cellranger as an array you will need one extra file. This file called `samples.txt` will have the name of each sample on its own line. + +``` +sample1 +sample2 +sample3 +... +sampleN +``` + +for ease `samples.txt` should be in the same directory as your sbatch script. + +Here is an example sbatch script for running cellranger as an array + +```(bash) +#!/bin/bash + +#SBATCH --job-name=CellRangerCount3 # Job name +#SBATCH --partition=short # Partition name +#SBATCH --time=0-05:00 # Runtime in D-HH:MM format +#SBATCH --nodes=1 # Number of nodes (keep at 1) +#SBATCH --ntasks=1 # Number of tasks per node (keep at 1) +#SBATCH --cpus-per-task=16 # CPU cores requested per task (change for threaded jobs) +#SBATCH --mem=128G # Memory needed per node (total) +#SBATCH --error=jobid_%j.err # File to which STDERR will be written, including job ID +#SBATCH --output=jobid_%j.out # File to which STDOUT will be written, including job ID + + +samp=$(awk -v awkvar="${SLURM_ARRAY_TASK_ID}" 'NR==awkvar' samples.txt) ### This line will take the numeric slurm array task id and find the corresponding line number in samples.txt. The sample name is made into a variable called samp. + +module load cellranger/7.1.0 + +cellranger count \ + --id=${samp} \ ## This is what your output folders will be named + --fastqs=/path/to/your/folder/of/fastq/folders/${samp} \ + --transcriptome=/path/to/your/genome \ + --localcores=16 \ + --localmem=128 + +``` + +This script can be run depending on the number of samples you have. Here we will call it N: + +``` +sbatch --array=1-N run_cellranger.sh +``` + +For example if you have 9 samples to run you can use: + +``` +sbatch --array=1-9 run_cellranger.sh +``` + +Arrays are also handy if you need to re-run just a single sample. Let's say you need to re-run the 1st and 9th sample in samples.txt + +``` +sbatch --array=1,9 run_cellranger.sh +``` + +# Step 2 - going from cellranger output to Seurat + +All of your output is now in the output folders you denoted in your cellranger script. These files are almost identical inside. The bit we want to keep is going to always be the same + +``` +sample1/out/raw_feature_bc_matrix.h5 +``` + +**Note: Always use the raw matrix and not filtered_feature_bc_matrix.h5. We will do our own QC downstream.** + +In this part of the tutorial we will go through the parts of creating the seurat object piece by piece. At the bottom is the entire script that you can copy and paste and edit. + +## Part 1 - reading all the files in and creating initial seurat object + +This should all be done in an interactive session on O2 with at least 96G of memory. + + +```(R) + +library(Seurat) +library(data.table) +library(hdf5r) + +### Set up run information +data_dir <- "/path/to/cellranger/output/folders/" + +samples <- c("sample1", "sample2", "sample3") + +### Make individual seurat objects for each sample + +for (i in 1:length(samples)){ + seurat_data <- Read10X_h5(paste(c(data_dir,samples[i],"/outs/raw_feature_bc_matrix.h5"),sep="",collapse = "")) + seurat_obj <- CreateSeuratObject(counts = seurat_data, + min.features = 100, ## only keep cells with at least 100 genes + project = samples[i]) + assign(paste0(samples[i], "_seurat"), + seurat_obj) # stores Seurat object in variable of corresponding sample name +} + +### Merge all seurat objects + +seurat_ID <- paste0(samples, "_seurat") # get names of all objects + + +u <- get(seurat_ID[2]) +for (i in 3:length(seurat_ID)) { + u <- c(u, get(seurat_ID[i])) +} ## makes a list of all seurat objects + +seurat_merge <- merge(x = get(seurat_ID[1]), + y = u, + add.cell.ids = all_samples, + project = "my_scRNA_project") + +``` + +## Part 2 - Add mitochondrial information + +This code is for using mouse mitochondrial genes: + + +```(R) +# Mitochondrial genes for mouse genome +idx <- grep("^mt-", rownames(GetAssay(seurat_merge, "RNA"))) +rownames(GetAssay(seurat_merge, "RNA"))[idx] +# Mitochondrial genes vs. nuclear genes ratio +seurat_merge$mitoRatio <- PercentageFeatureSet(object = seurat_merge, pattern = "^mt-") +seurat_merge$mitoRatio <- seurat_merge@meta.data$mitoRatio/100 # Divide by 100 for Ratio instead of Percentage +``` + +This code is for using human mitochondrial genes: + +```(R) +# Mitochondrial genes for human genome +idx <- grep("^MT-", rownames(GetAssay(seurat_obj, "RNA"))) +rownames(GetAssay(seurat_merge, "RNA"))[idx] +# Mitochondrial genes vs. nuclear genes ratio +seurat_merge$mitoRatio <- PercentageFeatureSet(object = seurat_merge, pattern = "^mt-") +seurat_merge$mitoRatio <- seurat_merge@meta.data$mitoRatio/100 # Divide by 100 for Ratio instead of Percentage +``` + +Below is an example code from the siberian hamster. Here I all of the mitochondrial genes were found manually a list was created. + +```(R) +mito_genes <- c("ND1", "ND2","COX1","COX2","ATP8","ATP6","COX3","ND3","ND4L","ND4","ND5","ND6","CYTB") +idx <- (rownames(GetAssay(seurat_merge, "RNA")) %in% mito_genes) +rownames(GetAssay(seurat_merge, "RNA"))[idx] +# Mitochondrial genes vs. nuclear genes ratio +seurat_merge$mitoRatio <- PercentageFeatureSet(object = seurat_merge, features = mito_genes) +seurat_merge$mitoRatio <- seurat_merge@meta.data$mitoRatio/100 # +``` + +## Part 3 - Add additional metadata + +Here we add some additonal metrics and sample metadata from the client. + +```(R) +# Number of genes per UMI for each cell +seurat_merge$Log10GenesPerUMI <- log10(seurat_merge$nFeature_RNA) / log10(seurat_merge$nCount_RNA) + +# Import experimental metadata +metaexp <- read.csv("/path/to/experimental/metadata/meta.csv") + +# Check matching of IDs +all(metaexp$sample %in% metadata$orig.ident) +all(metadata$orig.ident %in% metaexp$sample) + +#change headings to match +colnames(metaexp)[1] <- "orig.ident" + +metafull <- plyr::join(metadata, metaexp, + by = c("orig.ident")) + +# Replace seurat object metadata +if(all(metafull$barcode == rownames(seurat_merge@meta.data))) { + rownames(metafull) <- metafull$barcode + seurat_merge@meta.data <- metafull +} +``` + +## Part 4 - Save object + +```(R) +## Join layers (each sample is a separate layer) +seurat_merge[["RNA"]] <- JoinLayers(seurat_merge[["RNA"]]) + +### Save Seurat object for future processing +saveRDS(seurat_merge, file = "seurat_pre-filtered.rds") +write.csv(seurat_merge@meta.data, file = "metadata_pre-filtered.csv") +``` + +## Full script + +Below find all pieces to copy and paste. We are assuming a mouse genome. + +``` + +library(Seurat) +library(data.table) +library(hdf5r) + +### Set up run information +data_dir <- "/path/to/cellranger/output/folders/" + +samples <- c("sample1", "sample2", "sample3") + +### Make individual seurat objects for each sample + +for (i in 1:length(samples)){ + seurat_data <- Read10X_h5(paste(c(data_dir,samples[i],"/outs/raw_feature_bc_matrix.h5"),sep="",collapse = "")) + seurat_obj <- CreateSeuratObject(counts = seurat_data, + min.features = 100, ## only keep cells with at least 100 genes + project = samples[i]) + assign(paste0(samples[i], "_seurat"), + seurat_obj) # stores Seurat object in variable of corresponding sample name +} + +### Merge all seurat objects + +seurat_ID <- paste0(samples, "_seurat") # get names of all objects + + +u <- get(seurat_ID[2]) +for (i in 3:length(seurat_ID)) { + u <- c(u, get(seurat_ID[i])) +} ## makes a list of all seurat objects + +seurat_merge <- merge(x = get(seurat_ID[1]), + y = u, + add.cell.ids = all_samples, + project = "my_scRNA_project") + + +# Mitochondrial genes for mouse genome +idx <- grep("^mt-", rownames(GetAssay(seurat_merge, "RNA"))) +rownames(GetAssay(seurat_merge, "RNA"))[idx] +# Mitochondrial genes vs. nuclear genes ratio +seurat_merge$mitoRatio <- PercentageFeatureSet(object = seurat_merge, pattern = "^mt-") +seurat_merge$mitoRatio <- seurat_merge@meta.data$mitoRatio/100 # Divide by 100 for Ratio instead of Percentage + +# Number of genes per UMI for each cell +seurat_merge$Log10GenesPerUMI <- log10(seurat_merge$nFeature_RNA) / log10(seurat_merge$nCount_RNA) + +# Import experimental metadata +metaexp <- read.csv("/path/to/experimental/metadata/meta.csv") + +# Check matching of IDs +all(metaexp$sample %in% metadata$orig.ident) +all(metadata$orig.ident %in% metaexp$sample) + +#change headings to match +colnames(metaexp)[1] <- "orig.ident" + +metafull <- plyr::join(metadata, metaexp, + by = c("orig.ident")) + +# Replace seurat object metadata +if(all(metafull$barcode == rownames(seurat_merge@meta.data))) { + rownames(metafull) <- metafull$barcode + seurat_merge@meta.data <- metafull +} + + +## Join layers (each sample is a separate layer) +seurat_merge[["RNA"]] <- JoinLayers(seurat_merge[["RNA"]]) + +### Save Seurat object for future processing +saveRDS(seurat_merge, file = "seurat_pre-filtered.rds") +write.csv(seurat_merge@meta.data, file = "metadata_pre-filtered.csv") +``` \ No newline at end of file diff --git a/inst/rmarkdown/templates/teaseq/skeleton/QC/QC-01-load_data.R b/inst/templates/teaseq/QC/QC-01-load_data.R similarity index 100% rename from inst/rmarkdown/templates/teaseq/skeleton/QC/QC-01-load_data.R rename to inst/templates/teaseq/QC/QC-01-load_data.R diff --git a/inst/rmarkdown/templates/teaseq/skeleton/QC/QC-02-run_analysis.R b/inst/templates/teaseq/QC/QC-02-run_analysis.R similarity index 100% rename from inst/rmarkdown/templates/teaseq/skeleton/QC/QC-02-run_analysis.R rename to inst/templates/teaseq/QC/QC-02-run_analysis.R diff --git a/inst/rmarkdown/templates/teaseq/skeleton/QC/QC.Rmd b/inst/templates/teaseq/QC/QC.Rmd similarity index 100% rename from inst/rmarkdown/templates/teaseq/skeleton/QC/QC.Rmd rename to inst/templates/teaseq/QC/QC.Rmd diff --git a/inst/rmarkdown/templates/teaseq/skeleton/README.md b/inst/templates/teaseq/README.md similarity index 100% rename from inst/rmarkdown/templates/teaseq/skeleton/README.md rename to inst/templates/teaseq/README.md diff --git a/inst/rmarkdown/templates/teaseq/skeleton/information.R b/inst/templates/teaseq/information.R similarity index 100% rename from inst/rmarkdown/templates/teaseq/skeleton/information.R rename to inst/templates/teaseq/information.R diff --git a/inst/rmarkdown/templates/teaseq/skeleton/scripts/fix_filenames.R b/inst/templates/teaseq/scripts/fix_filenames.R similarity index 100% rename from inst/rmarkdown/templates/teaseq/skeleton/scripts/fix_filenames.R rename to inst/templates/teaseq/scripts/fix_filenames.R diff --git a/inst/rmarkdown/templates/teaseq/skeleton/scripts/gex_adt_hto.sbatch b/inst/templates/teaseq/scripts/gex_adt_hto.sbatch similarity index 100% rename from inst/rmarkdown/templates/teaseq/skeleton/scripts/gex_adt_hto.sbatch rename to inst/templates/teaseq/scripts/gex_adt_hto.sbatch diff --git a/inst/rmarkdown/templates/teaseq/skeleton/scripts/gex_atac.sbatch b/inst/templates/teaseq/scripts/gex_atac.sbatch similarity index 100% rename from inst/rmarkdown/templates/teaseq/skeleton/scripts/gex_atac.sbatch rename to inst/templates/teaseq/scripts/gex_atac.sbatch diff --git a/man/bcbio_nfcore_check.Rd b/man/bcbio_nfcore_check.Rd index 76c954f..022c903 100644 --- a/man/bcbio_nfcore_check.Rd +++ b/man/bcbio_nfcore_check.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/hello.R +% Please edit documentation in R/helpers.R \name{bcbio_nfcore_check} \alias{bcbio_nfcore_check} \title{Function to check samplesheet for nf-core} diff --git a/man/bcbio_set_project.Rd b/man/bcbio_set_project.Rd deleted file mode 100644 index b3c67ef..0000000 --- a/man/bcbio_set_project.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/hello.R -\name{bcbio_set_project} -\alias{bcbio_set_project} -\title{Function to help with project name used for parent folder} -\usage{ -bcbio_set_project() -} -\value{ -A string list with hbc_code, and project folder name -} -\description{ -This function will ask for user input about: -\itemize{ -\item numeric code -\item PI full name -\item technology -\item tissue -\item organism -\item project description -} -} -\details{ -It removes special character with \verb{_}. The output is a guideline to -what the folder used can be. -} diff --git a/man/bcbio_templates.Rd b/man/bcbio_templates.Rd index 45c9ad9..217ea0c 100644 --- a/man/bcbio_templates.Rd +++ b/man/bcbio_templates.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/hello.R +% Please edit documentation in R/helpers.R \name{bcbio_templates} \alias{bcbio_templates} \title{Function to help deploy analysis folder inside a project folder} @@ -7,13 +7,7 @@ bcbio_templates(type = "rnaseq", outpath) } \arguments{ -\item{type}{string indicating the type of analysis, supported: -\itemize{ -\item base -\item rnaseq, scrnaseq, -\item teaseq -\item cosmx -}} +\item{type}{string indicating the type of analysis, supported: rnaseq.} \item{outpath}{string path indicating where to copy all the files to} } diff --git a/tests/testthat/rnaseq.R b/tests/testthat/rnaseq.R index e30ebea..4b1967a 100644 --- a/tests/testthat/rnaseq.R +++ b/tests/testthat/rnaseq.R @@ -1,7 +1,6 @@ library(bcbioR) - -test_that("rnaseq testing", { +test_that("rnaseq copy",{ path <- withr::local_tempdir() print(path) bcbio_templates(type="rnaseq", outpath=path) @@ -26,4 +25,32 @@ test_that("rnaseq testing", { functions_file = file.path(path,'DE/load_data.R') ) ) + # use_bcbio_projects(path, nfcore="nf-core/rnaseq", copy=TRUE, git=FALSE) }) + +# test_that("rnaseq testing", { +# path <- withr::local_tempdir() +# print(path) +# bcbio_templates(type="rnaseq", outpath=path) +# numerator="tumor" +# denominator="normal" +# subset_value=NA +# rmarkdown::render(input = file.path(path,"DE/DEG.Rmd"), +# output_dir = file.path(path,"DE"), +# output_format = "html_document", +# output_file = ifelse(!is.na(subset_value), +# paste0('DE_', subset_value, '_', numerator, '_vs_', denominator, '.html'), +# paste0('DE_', numerator, '_vs_', denominator, '.html') +# ), +# clean = TRUE, +# envir = new.env(), +# params = list( +# subset_value = subset_value, +# numerator = numerator, +# denominator = denominator, +# params_file = file.path(path,'DE/params_de.R'), +# project_file = file.path(path,'information.R'), +# functions_file = file.path(path,'DE/load_data.R') +# ) +# ) +# }) diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/DE/PCA_variance_analysis.Rmd b/vignettes/PCA_variance_analysis.Rmd similarity index 100% rename from inst/rmarkdown/templates/rnaseq/skeleton/DE/PCA_variance_analysis.Rmd rename to vignettes/PCA_variance_analysis.Rmd