-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Built on 2023-09-15, v0.1.35 update, dev
- Loading branch information
Showing
25 changed files
with
1,064 additions
and
192 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
########################################## | ||
# Cell Hashtag Analysis Track library prep (Step 1) | ||
########################################## | ||
|
||
## load parameters | ||
args = commandArgs(trailingOnly=TRUE) | ||
output_dir=args[1] | ||
r_lib_path=args[2] | ||
pipeline_home=args[3] | ||
|
||
## load libraries | ||
.libPaths(r_lib_path) | ||
|
||
## load parameters | ||
source(paste(output_dir,'/job_info/parameters/step1_par.txt',sep="")) | ||
|
||
## automated library prep | ||
if (tolower(par_automated_library_prep)=='yes') { | ||
dir.create(paste(output_dir, "/samples_info", sep = "")) | ||
new_dir <- paste(output_dir, "/samples_info/", sep = "") | ||
|
||
for(i in length(par_seq_run_names)) { | ||
#create directory | ||
dir.create(paste(new_dir,par_seq_run_names[i], sep = "")) | ||
|
||
#write the library.csv file | ||
fastqs <- c(par_fastq_directory, par_fastq_directory) | ||
sample <- c(par_RNA_run_names[i], par_HTO_run_names[i]) | ||
library_type <- c("Gene Expression","Antibody Capture") | ||
df <- data.frame(fastqs, sample,library_type) | ||
write.table(df, file = paste(new_dir,par_seq_run_names[i],"/library.csv", sep=""),sep=",", row.names = FALSE, quote = FALSE) | ||
|
||
#write feature_ref.csv files | ||
length <- length(id) | ||
pattern <- rep.int(pattern, length) | ||
read <- rep.int(read, length) | ||
feature_type <- "Antibody Capture" | ||
feature_type <- rep.int(feature_type, length) | ||
df <- data.frame(id,name,read, pattern, sequence, feature_type) | ||
write.table(df, file = paste(new_dir,par_seq_run_names[i],"/feature_ref.csv", sep=""),sep=",", row.names = FALSE, quote = FALSE) | ||
} | ||
} | ||
|
||
if (tolower(par_automated_library_prep)=='no') { | ||
print("Skipping automated library prep for CellRanger") | ||
} | ||
|
||
|
||
|
65 changes: 65 additions & 0 deletions
65
scrnabox.slurm/general_codes/scrna_automated_library_prep.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
########################################## | ||
# Standard Analysis Track library prep (Step 1) | ||
########################################## | ||
|
||
## load parameters | ||
args = commandArgs(trailingOnly=TRUE) | ||
output_dir=args[1] | ||
r_lib_path=args[2] | ||
pipeline_home=args[3] | ||
|
||
## load libraries | ||
.libPaths(r_lib_path) | ||
|
||
## load parameters | ||
source(paste(output_dir,'/job_info/parameters/step1_par.txt',sep="")) | ||
|
||
|
||
## automated library prep and do not rename samples | ||
if (tolower(par_automated_library_prep)=='yes' & (tolower(par_rename_samples)=='no')) { | ||
dir.create(paste(output_dir, "/samples_info", sep = "")) | ||
new_dir <- paste(output_dir, "/samples_info/", sep = "") | ||
|
||
for(i in par_sample_names) { | ||
#create directory | ||
dir.create(paste(new_dir,i, sep = "")) | ||
|
||
#write the csv file | ||
fastqs <- par_fastq_directory | ||
sample <- i | ||
library_type <- "Gene Expression" | ||
df <- data.frame(fastqs, sample,library_type) | ||
write.table(df, file = paste(new_dir,i,"/library.csv", sep=""),sep=",", row.names = FALSE, quote = FALSE) | ||
} | ||
} | ||
|
||
## automated library prep and rename samples | ||
if (tolower(par_automated_library_prep)=='yes' & (tolower(par_rename_samples)=='yes')) { | ||
dir.create(paste(output_dir, "/samples_info", sep = "")) | ||
new_dir <- paste(output_dir, "/samples_info/", sep = "") | ||
for(i in par_sample_names) { | ||
#parse new sample names | ||
old_names <- par_sample_names | ||
new_names <- par_new_sample_names | ||
names_frame <- data.frame(old_names,new_names ) | ||
new_i <- names_frame$new_names[old_names == i] | ||
|
||
# create directory | ||
dir.create(paste(new_dir,new_i, sep = "")) | ||
|
||
#write the csv file | ||
fastqs <- par_fastq_directory | ||
sample <- i | ||
library_type <- "Gene Expression" | ||
df <- data.frame(fastqs, sample,library_type) | ||
write.table(df, file = paste(new_dir,new_i,"/library.csv", sep=""),sep=",", row.names = FALSE, quote = FALSE) | ||
} | ||
} | ||
|
||
## do not perform automated library prep | ||
if (tolower(par_automated_library_prep)=='no') { | ||
print("Skipping automated library prep for CellRanger") | ||
} | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,48 @@ | ||
###### REF_DIR_GRCH, path of grch | ||
############################################################################ | ||
# If you wan to automate the libraries preparation process, | ||
# set par_automated_library_prep to "yes" and adress the remaining parameters. | ||
############################################################################ | ||
## Do you want to perform automated library prep? | ||
par_automated_library_prep <- "yes" | ||
|
||
###### library.csv parameters | ||
## Path to the directory containing the FASTQ files for the RNA and Antibody assays. | ||
## This folder should only contain the FASTQ files for the experiment. | ||
par_fastq_directory <- "/home/fiorini9/scratch/scrna_pipeline/stoeckius_markdown/raw_data/fastqs" | ||
## list the sequencing run name(s) used in the FASTQ nomenclature for the RNA assay | ||
par_RNA_run_names <- c("run1GEX") | ||
## list the sequencing run name(s) used in the FASTQ nomenclature for the Antibody assay | ||
par_HTO_run_names <- c("run1HTO") | ||
## Define the name(s) of your sequencing run(s). This can be whatever you would like, but make sure you list the names of the sequencing runs in the same order as the names of the RNA and HTO assays are listed. | ||
par_seq_run_names <- c("run1") | ||
## If your sequencing is paired-end, set the following to TRUE. Otherwise set it as FALSE. | ||
par_paired_end_seq <- TRUE | ||
|
||
###### feature_ref.csv parameters | ||
## please note that if you used different sample-specific barcodes for each sequencing run, you should manually prepare the feature_ref.csv files | ||
## list the IDs of the sample-slecific barcodes | ||
id<-c('Hash1', 'Hash2', 'Hash3', 'Hash4', 'Hash5', 'Hash6', 'Hash7', 'Hash8') | ||
## list the names of the sample-specific barcodes | ||
name <-c('A_TotalSeqA', 'B_TotalSeqA', 'C_TotalSeqA', 'D_TotalSeqA', 'E_TotalSeqA', 'F_TotalSeqA', 'G_TotalSeqA', 'H_TotalSeqA') | ||
## which read is the sample-specific barcode on? The majority of cases will be R2. | ||
read <- "R2" | ||
## define the pattern of the sample-specific barcodes. | ||
pattern <- "5P(BC)" | ||
## list the base-pair sequences for each of the sample-specific barcodes | ||
sequence <- c('AGGACCATCCAA','ACATGTTACCGT', 'AGCTTACTATCC', 'TCGATAATGCGA', 'GAGGCTGAGCTA', 'GTGTGACGTATT', 'ACTGTCTAACGG','TATCACATCGGT') | ||
|
||
|
||
############################################################################ | ||
# CellRanger counts pipeline parameters. | ||
############################################################################ | ||
## path to reference genome | ||
REF_DIR_GRCH=/cvmfs/soft.mugqic/CentOS6/genomes/species/Homo_sapiens.GRCh38/genome/10xGenomics/refdata-cellranger-GRCh38-3.0.0 | ||
###### R1LENGTH=20, Limit the length of the input Read 1 sequence of Gene Expression (and any Feature Barcode) library to the first N bases, where N is a user-supplied value. | ||
###### mempercore, For clusters whose job managers do not support memory requests, it is possible to request memory in the form of cores via the --mempercore command-line option. This option will scale up the number of threads requested via the __MRO_THREADS__ variable according to how much memory a stage requires when given to the ratio of memory on your nodes. | ||
MEMPERCORE=30 | ||
## Minimum number of bases to retain for R1 sequence of gene expression assay. If you want to use this parameter uncomment the line below and define your R1LENGTH. | ||
# R1LENGTH=20 | ||
## For clusters whose job managers do not support memory requests, it is possible to request memory in the form of cores. This option will scale up the number of threads requested via the MRO_THREADS variable according to how much memory a stage requires when given to the ratio of memory on your nodes. | ||
MEMPERCORE=30 | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,28 @@ | ||
Save_RNA="NO" | ||
Save_metadata="NO" | ||
###### min.cells sets the threshold for genes to only take the genes that are present in at least a specified number of cells. | ||
min.cells_L=0 | ||
###### min.features sets the thresholds for cells that express at least a specified number of genes. | ||
min.features_L=0 | ||
###### count_matrices="", if you have the count matrix and do not want to run cellranger, use this add the path to this object. | ||
#count_matrices='/lustre03/project/6070393/COMMON/Dark_Genome/samamiri/test_scrnabox/tutorial_scrna/download/course_data/count_matrices' | ||
############################################################################ | ||
# If you want to save an RNA expression matrix and metadata dataframe set the following to "yes" | ||
############################################################################ | ||
par_save_RNA="NO" | ||
par_save_metadata="NO" | ||
|
||
############################################################################ | ||
# Ambient RNA removal | ||
############################################################################ | ||
## If you want to remove the ambient RNA, change the default to "yes". | ||
par_ambient_rna="yes" | ||
|
||
############################################################################ | ||
# Exisiting feature-barcode matrices | ||
############################################################################ | ||
## If you already have feature-barcode matrices and want to initiate scRNAbox at step 2, without running step 1 (cell ranger), | ||
## uncomment the line below and add the path to the directory containing the feature-barcode matrices. | ||
#par_count_matrices='/lustre03/project/6070393/COMMON/Dark_Genome/samamiri/test_scrnabox/tutorial_scrna/download/course_data/count_matrices' | ||
|
||
############################################################################ | ||
# Filtering parameters | ||
############################################################################ | ||
## Only retain the genes that are present in at least a specified number of cells. | ||
par_min.cells_L=0 | ||
## Only retain the cells that express at least a specified number of genes. | ||
par_min.features_L=0 | ||
|
||
|
Binary file not shown.
Oops, something went wrong.