Built on 2023-09-15, v0.1.35 update, dev

neurobioinfo · Sep 19, 2023 · 0ffed85 · 0ffed85
1 parent 54f4d9f
commit 0ffed85
Show file tree

Hide file tree

Showing 25 changed files with 1,064 additions and 192 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/docs/Dataset1.md b/docs/Dataset1.md
diff --git a/docs/FAQ.md b/docs/FAQ.md
@@ -9,12 +9,13 @@ tags: scRNA FAQ
 categories: 
 comments: false
 ---
-## Frequently asked questions
+#### Frequently asked questions
 
 - [Possibility to consider multiple control](#possibility-to-consider-multiple-control)
 - [How skip the integration step](#how-skip-the-integration-step)
 - [Can we run without cellraanger](#can-we-run-without-cellraanger)
-## Possibility to consider multiple control
+- [error: Batch job submission failed: Requested node configuration is not available](#error-batch-job-submission-failed-requested-node-configuration-is-not-available)
+#### Possibility to consider multiple control
 User can redefine the genotype; in the step8, add `/job_info/parameters/step8_par.txt`, add `new_genotype='YES` and redefine the labels: 
 
 ```
@@ -23,8 +24,38 @@ old_antibody_label=c('B0251-TotalSeqB','B0252-TotalSeqB','B0253-TotalSeqB','B025
 new_antibody_label=c('AIW002','SNCA-A53T','GBA-KO','Parkin-KO','PINK1-KO','SNCA-KO')
 ```
 
-## How to skip the integration step
+#### How to skip the integration step
 If you have a sole sample, there is no need for the integration step; once you run step 4, go to step 6 and add `par_skip_step5='YES'` to the step 6 parameter, `/job_info/parameters/step6_par.txt`.
 
-## Can we run without cellraanger. 
+#### Can we run without cellraanger. 
 if you have the matrix file, .......
+
+#### error: Batch job submission failed: Requested node configuration is not available
+If you get the above error, navigate to the `scrnabox_config.ini` file in `~/working_directory/job_info/configs` and adjust the Job parameters for the Analytical Step that produced the error. Make sure to uncomment the corresponding lines in the configuration file. For example, if you get the error in Step 2, navigate to the configuration file and you will see the following.
+```
+############# [step2]
+# THREADS_ARRAY["step_2"]=10
+# MEM_ARRAY["step_2"]=16g
+# WALLTIME_ARRAY["step_2"]=00-05:00
+```
+To resolve the error, uncomment the lines and change the values. For instance:
+```
+############# [step2]
+THREADS_ARRAY["step_2"]=4
+MEM_ARRAY["step_2"]=10g
+WALLTIME_ARRAY["step_2"]=00-05:00
+```
+
+#### Step 3: Error in { : task 1 failed - "No cells found"
+
+adjust the filtering parameters, this means that none of the cells in the users experiment pass all of the filtering thresholds. 
+
+
+#### how to edit text files in the terminal
+
+
+
+
+
+
+
diff --git a/docs/HTO.md b/docs/HTO.md
@@ -115,8 +115,8 @@ Finally, in preparation for Step 1 (FASTQ pre-processing with CellRanger) users
 The `library.csv` file defines the necessary information of the FASTQ files for the experiment, including the gene expression and antibody assays. The structure of the `library.csv` file should be: <br />
 ```
 fastqs,sample,library_type
-~/fastqs/,CTRL1_GEX,Gene Expression
-~/fastqs/,CTRL1_HTO,Antibody Capture
+~/fastqs/,RUN1GEX,Gene Expression
+~/fastqs/,RUN1HTO,Antibody Capture
 ```
 - The `fastqs` column defines the path to the directory that contains the FASTQ files for the experiment. <br /> 
 - The `sample` column defines the sample name of the corresponding FASTQ file. Please note that FASTQ files must be named according to standard CellRanger nomenclature. For example, "CTRL1_S1_L001_R1_001.fastq". For more information please visit CellRanger's [documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/fastq-input). <br />

diff --git a/scrnabox.slurm/.DS_Store b/scrnabox.slurm/.DS_Store
diff --git a/scrnabox.slurm/general_codes/.DS_Store b/scrnabox.slurm/general_codes/.DS_Store
diff --git a/scrnabox.slurm/general_codes/HTO_automated_library_prep.R b/scrnabox.slurm/general_codes/HTO_automated_library_prep.R
@@ -0,0 +1,49 @@
+##########################################
+# Cell Hashtag Analysis Track library prep (Step 1)
+########################################## 
+
+## load parameters
+args = commandArgs(trailingOnly=TRUE)
+output_dir=args[1]
+r_lib_path=args[2]
+pipeline_home=args[3]
+
+## load libraries
+.libPaths(r_lib_path)
+
+## load parameters
+source(paste(output_dir,'/job_info/parameters/step1_par.txt',sep=""))
+
+## automated library prep 
+if (tolower(par_automated_library_prep)=='yes') {
+    dir.create(paste(output_dir, "/samples_info", sep = ""))
+    new_dir <- paste(output_dir, "/samples_info/", sep = "")
+
+for(i in length(par_seq_run_names)) {
+    #create directory
+    dir.create(paste(new_dir,par_seq_run_names[i], sep = ""))
+
+    #write the library.csv file 
+    fastqs <- c(par_fastq_directory, par_fastq_directory)
+    sample <- c(par_RNA_run_names[i], par_HTO_run_names[i])
+    library_type <- c("Gene Expression","Antibody Capture") 
+    df <- data.frame(fastqs, sample,library_type)
+    write.table(df, file = paste(new_dir,par_seq_run_names[i],"/library.csv", sep=""),sep=",", row.names = FALSE, quote = FALSE) 
+
+    #write feature_ref.csv files 
+    length <- length(id)
+    pattern <- rep.int(pattern, length)
+    read <- rep.int(read, length)
+    feature_type <- "Antibody Capture"
+    feature_type <- rep.int(feature_type, length)
+    df <- data.frame(id,name,read, pattern, sequence, feature_type)
+    write.table(df, file = paste(new_dir,par_seq_run_names[i],"/feature_ref.csv", sep=""),sep=",", row.names = FALSE, quote = FALSE) 
+}   
+}
+
+if (tolower(par_automated_library_prep)=='no') {
+print("Skipping automated library prep for CellRanger")  
+}
+
+
+
diff --git a/scrnabox.slurm/general_codes/scrna_automated_library_prep.R b/scrnabox.slurm/general_codes/scrna_automated_library_prep.R
@@ -0,0 +1,65 @@
+##########################################
+# Standard Analysis Track library prep (Step 1)
+########################################## 
+
+## load parameters
+args = commandArgs(trailingOnly=TRUE)
+output_dir=args[1]
+r_lib_path=args[2]
+pipeline_home=args[3]
+
+## load libraries
+.libPaths(r_lib_path)
+
+## load parameters
+source(paste(output_dir,'/job_info/parameters/step1_par.txt',sep=""))
+
+
+## automated library prep and do not rename samples
+if (tolower(par_automated_library_prep)=='yes' & (tolower(par_rename_samples)=='no')) {
+    dir.create(paste(output_dir, "/samples_info", sep = ""))
+    new_dir <- paste(output_dir, "/samples_info/", sep = "")
+
+for(i in par_sample_names) {
+    #create directory
+    dir.create(paste(new_dir,i, sep = ""))
+
+    #write the csv file 
+    fastqs <- par_fastq_directory
+    sample <- i
+    library_type <- "Gene Expression"
+    df <- data.frame(fastqs, sample,library_type)
+    write.table(df, file = paste(new_dir,i,"/library.csv", sep=""),sep=",", row.names = FALSE, quote = FALSE)  
+}
+}
+
+## automated library prep and rename samples
+if (tolower(par_automated_library_prep)=='yes' & (tolower(par_rename_samples)=='yes')) {
+    dir.create(paste(output_dir, "/samples_info", sep = ""))
+    new_dir <- paste(output_dir, "/samples_info/", sep = "")
+for(i in par_sample_names) {
+    #parse new sample names
+    old_names <- par_sample_names
+    new_names <- par_new_sample_names
+    names_frame <- data.frame(old_names,new_names )
+    new_i <- names_frame$new_names[old_names == i]    
+
+    # create directory
+    dir.create(paste(new_dir,new_i, sep = ""))
+
+    #write the csv file 
+    fastqs <- par_fastq_directory
+    sample <- i
+    library_type <- "Gene Expression"
+    df <- data.frame(fastqs, sample,library_type)
+    write.table(df, file = paste(new_dir,new_i,"/library.csv", sep=""),sep=",", row.names = FALSE, quote = FALSE)  
+}
+}
+
+## do not perform automated library prep
+if (tolower(par_automated_library_prep)=='no') {
+print("Skipping automated library prep for CellRanger")  
+}
+
+
+
diff --git a/scrnabox.slurm/hto/pars/step1_par.txt b/scrnabox.slurm/hto/pars/step1_par.txt
@@ -1,5 +1,48 @@
-###### REF_DIR_GRCH, path of grch
+############################################################################
+# If you wan to automate the libraries preparation process,
+# set par_automated_library_prep to "yes" and adress the remaining parameters.
+############################################################################
+## Do you want to perform automated library prep?
+par_automated_library_prep <- "yes"
+
+###### library.csv parameters
+## Path to the directory containing the FASTQ files for the RNA and Antibody assays.
+## This folder should only contain the FASTQ files for the experiment. 
+par_fastq_directory <- "/home/fiorini9/scratch/scrna_pipeline/stoeckius_markdown/raw_data/fastqs"
+## list the sequencing run name(s) used in the FASTQ nomenclature for the RNA assay
+par_RNA_run_names <- c("run1GEX")
+## list the sequencing run name(s) used in the FASTQ nomenclature for the Antibody assay
+par_HTO_run_names <- c("run1HTO")
+## Define the name(s) of your sequencing run(s). This can be whatever you would like, but make sure you list the names of the sequencing runs in the same order as the names of the RNA and HTO assays are listed.
+par_seq_run_names <- c("run1")
+## If your sequencing is paired-end, set the following to TRUE. Otherwise set it as FALSE.
+par_paired_end_seq <- TRUE
+
+###### feature_ref.csv parameters
+## please note that if you used different sample-specific barcodes for each sequencing run, you should manually prepare the feature_ref.csv files
+## list the IDs of the sample-slecific barcodes
+id<-c('Hash1', 'Hash2', 'Hash3', 'Hash4', 'Hash5', 'Hash6', 'Hash7', 'Hash8')
+## list the names of the sample-specific barcodes
+name <-c('A_TotalSeqA', 'B_TotalSeqA', 'C_TotalSeqA', 'D_TotalSeqA', 'E_TotalSeqA', 'F_TotalSeqA', 'G_TotalSeqA', 'H_TotalSeqA')
+## which read is the sample-specific barcode on? The majority of cases will be R2.
+read <- "R2"
+## define the pattern of the sample-specific barcodes.
+pattern <- "5P(BC)"
+## list the base-pair sequences for each of the sample-specific barcodes 
+sequence <- c('AGGACCATCCAA','ACATGTTACCGT', 'AGCTTACTATCC', 'TCGATAATGCGA', 'GAGGCTGAGCTA', 'GTGTGACGTATT', 'ACTGTCTAACGG','TATCACATCGGT')
+
+
+############################################################################
+# CellRanger counts pipeline parameters.
+############################################################################
+## path to reference genome
 REF_DIR_GRCH=/cvmfs/soft.mugqic/CentOS6/genomes/species/Homo_sapiens.GRCh38/genome/10xGenomics/refdata-cellranger-GRCh38-3.0.0
-###### R1LENGTH=20, Limit the length of the input Read 1 sequence of Gene Expression (and any Feature Barcode) library to the first N bases, where N is a user-supplied value. 
-###### mempercore, For clusters whose job managers do not support memory requests, it is possible to request memory in the form of cores via the --mempercore command-line option. This option will scale up the number of threads requested via the __MRO_THREADS__ variable according to how much memory a stage requires when given to the ratio of memory on your nodes.
-MEMPERCORE=30 
+## Minimum number of bases to retain for R1 sequence of gene expression assay. If you want to use this parameter uncomment the line below and define your R1LENGTH.
+# R1LENGTH=20
+## For clusters whose job managers do not support memory requests, it is possible to request memory in the form of cores. This option will scale up the number of threads requested via the MRO_THREADS variable according to how much memory a stage requires when given to the ratio of memory on your nodes.
+MEMPERCORE=30 
+
+
+
+
+
diff --git a/scrnabox.slurm/hto/pars/step2_par.txt b/scrnabox.slurm/hto/pars/step2_par.txt
@@ -1,8 +1,28 @@
-Save_RNA="NO"
-Save_metadata="NO"
-###### min.cells sets the threshold for genes to only take the genes that are present in at least a specified number of cells.
-min.cells_L=0
-###### min.features sets the thresholds for cells that express at least a specified number of genes.
-min.features_L=0
-###### count_matrices="", if you have the count matrix and do not want to run cellranger, use this add the path to this object. 
-#count_matrices='/lustre03/project/6070393/COMMON/Dark_Genome/samamiri/test_scrnabox/tutorial_scrna/download/course_data/count_matrices'
+############################################################################
+# If you want to save an RNA expression matrix and metadata dataframe set the following to "yes"
+############################################################################
+par_save_RNA="NO"
+par_save_metadata="NO"
+
+############################################################################
+# Ambient RNA removal
+############################################################################
+## If you want to remove the ambient RNA, change the default to "yes". 
+par_ambient_rna="yes"
+
+############################################################################
+# Exisiting feature-barcode matrices
+############################################################################
+## If you already have feature-barcode matrices and want to initiate scRNAbox at step 2, without running step 1 (cell ranger),
+## uncomment the line below and add the path to the directory containing the feature-barcode matrices. 
+#par_count_matrices='/lustre03/project/6070393/COMMON/Dark_Genome/samamiri/test_scrnabox/tutorial_scrna/download/course_data/count_matrices'
+
+############################################################################
+# Filtering parameters
+############################################################################
+## Only retain the genes that are present in at least a specified number of cells.
+par_min.cells_L=0
+## Only retain the cells that express at least a specified number of genes.
+par_min.features_L=0
+
+
diff --git a/scrnabox.slurm/hto/scripts/.DS_Store b/scrnabox.slurm/hto/scripts/.DS_Store