adds link to tradeseq tutorial

cmulittlechild · Jan 28, 2021 · 66f46c5 · 66f46c5
1 parent 243c8a8
commit 66f46c5
Show file tree

Hide file tree

Showing 18 changed files with 6,472 additions and 4,714 deletions.
diff --git a/exercises.md b/exercises.md
@@ -31,7 +31,7 @@ During this workshop, you will use conda environments to run the exercises. This
 | <img border="0" src="https://static.thenounproject.com/png/1517975-200.png" width="30" height="30"> Differential expression | [Seurat_dge](labs/compiled/seurat/seurat_05_dge.html) ([.Rmd](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/seurat/seurat_05_dge.Rmd)) | [Scater_dge](labs/compiled/scater/scater_05_dge.html) ([.Rmd](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/scater/scater_05_dge.Rmd)) | [Scanpy_dge](labs/compiled/scanpy/scanpy_05_dge.html) ([.ipynb](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/scanpy/scanpy_05_dge.ipynb)) |
 | <img border="0" src="files/icon_celltype.png" width="30" height="30"> Celltype prediction | [Seurat_ct](labs/compiled/seurat/seurat_06_celltype.html) ([.Rmd](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/seurat/seurat_06_celltype.Rmd)) | [Scater_ct](labs/compiled/scater/scater_06_celltype.html) ([.Rmd](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/scater/scater_06_celltype.Rmd)) | [Scanpy_ct](labs/compiled/scanpy/scanpy_06_celltype.html) ([.ipynb](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/scanpy/scanpy_06_celltype.ipynb)) |
 | <img border="0" src="files/ST_icon.png" width="30" height="30"> Spatial transcriptomics | [Seurat_ST](labs/compiled/seurat/seurat_07_spatial.html) ([.Rmd](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/seurat/seurat_07_spatial.Rmd)) | [Scater_ST](labs/compiled/scater/scater_07_spatial.html) ([.Rmd](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/scater/scater_07_spatial.Rmd)) | [Scanpy_ST](labs/compiled/scanpy/scanpy_07_spatial.html) ([.ipynb](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/scanpy/scanpy_07_spatail.ipynb)) |
-| <img border="0" src="https://cdn2.vectorstock.com/i/1000x1000/49/51/route-location-icon-vector-16394951.jpg" width="30" height="30"> Trajectory inference | [Slingshot_ti](labs/compiled/slingshot/slingshot.html) ([.Rmd](https://raw.githubusercontent.com/NBISweden/workshop-scRNAseq/master/labs/compiled/slingshot/slingshot.Rmd)) | [Slingshot_ti](labs/compiled/slingshot/slingshot.html) | [PAGA_ti](https://scanpy-tutorials.readthedocs.io/en/latest/paga-paul15.html) |
+| <img border="0" src="https://cdn2.vectorstock.com/i/1000x1000/49/51/route-location-icon-vector-16394951.jpg" width="30" height="30"> Trajectory inference | [Slingshot_ti](https://bioconductor.org/packages/devel/bioc/vignettes/tradeSeq/inst/doc/tradeSeq.html)  | [Slingshot_ti](https://bioconductor.org/packages/devel/bioc/vignettes/tradeSeq/inst/doc/tradeSeq.html) | [PAGA_ti](https://scanpy-tutorials.readthedocs.io/en/latest/paga-paul15.html) |
 
 <br/>
 

diff --git a/files/icon_trajectory.png b/files/icon_trajectory.png
diff --git a/labs/compiled/seurat/seurat_05_dge.Rmd b/labs/compiled/seurat/seurat_05_dge.Rmd
@@ -230,6 +230,8 @@ Gene Set Enrichment Analysis (GSEA)
 Besides the enrichment using hypergeometric test, we can also perform gene set enrichment analysis (GSEA), which scores ranked genes list (usually based on fold changes) and computes permutation test to check if a particular gene set is more present in the Up-regulated genes, amongthe DOWN_regulated genes or not differentially regulated.
 
 ```{r,fig.height=10,fig.width=10}
+DGE_cell_selection <- FindMarkers(cell_selection, ident.1 = "Covid", ident.2 = "Ctrl", logfc.threshold = -Inf, test.use = "wilcox",      min.pct = 0, min.diff.pct = 0, only.pos = FALSE, max.cells.per.ident = 50,     assay = "RNA")
+
 # Create a gene rank based on the gene expression fold change
 gene_rank <- setNames( DGE_cell_selection$avg_logFC, casefold(rownames(DGE_cell_selection),upper=T) )
 ```
@@ -259,7 +261,7 @@ names(gmt) <- unique(paste0(msigdbgmt_subset$gs_name,"_",msigdbgmt_subset$gs_exa
 library(fgsea)
 
 # Perform enrichemnt analysis
-fgseaRes <- fgsea( pathways=gmt, stats=gene_rank, minSize=15, maxSize=500,nperm = 10000)
+fgseaRes <- fgsea( pathways=gmt, stats=gene_rank, minSize=15, maxSize=500)
 fgseaRes <- fgseaRes[ order(fgseaRes$pval) ,]
 
 # Filter the results table to show only the top 10 UP or DOWN regulated processes (optional)
@@ -285,7 +287,6 @@ Finally, lets save the integrated data for further analysis.
 
 ```{r}
 saveRDS(alldata,"data/3pbmc_qc_dr_int_cl_dge.rds")
-write.csv(markers_genes)
 ```
 
 

diff --git a/labs/compiled/seurat/seurat_05_dge.html b/labs/compiled/seurat/seurat_05_dge.html
diff --git a/labs/compiled/seurat/seurat_05_dge.md b/labs/compiled/seurat/seurat_05_dge.md
diff --git a/labs/compiled/seurat/seurat_05_dge.nb.html b/labs/compiled/seurat/seurat_05_dge.nb.html
diff --git a/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-10-1.png b/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-10-1.png
diff --git a/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-11-1.png b/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-11-1.png
diff --git a/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-13-1.png b/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-13-1.png
diff --git a/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-2-1.png b/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-2-1.png
diff --git a/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-5-1.png b/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-5-1.png
diff --git a/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-5-2.png b/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-5-2.png
diff --git a/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-6-1.png b/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-6-1.png
diff --git a/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-7-1.png b/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-7-1.png
diff --git a/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-8-1.png b/labs/compiled/seurat/seurat_05_dge_files/figure-html/unnamed-chunk-8-1.png
diff --git a/labs/seurat/seurat_05_dge.Rmd b/labs/seurat/seurat_05_dge.Rmd
@@ -180,6 +180,15 @@ abline(v = 0, lty = 1)
 #DGE_ALL7.2:
 
 ```{r,fig.height=10,fig.width=10}
+DGE_cell_selection <- FindMarkers(cell_selection,
+                               logfc.threshold = -Inf,
+                               test.use = "wilcox",
+                               min.pct = 0,
+                               min.diff.pct = 0,
+                               only.pos = FALSE,
+                               max.cells.per.ident = 50,
+                               assay = "RNA")
+
 # Create a gene rank based on the gene expression fold change
 gene_rank <- setNames( DGE_cell_selection$avg_logFC, casefold(rownames(DGE_cell_selection),upper=T) )
 ```

diff --git a/labs/trajectory/slingshot.Rmd b/labs/trajectory/slingshot.Rmd
@@ -9,22 +9,6 @@ editor_options:
 # Trajectory inference analysis: Slingshot
 
 
-### Downloading dataset
-
-```{bash, eval=F}
-#Create data folder
-mkdir data
-
-#Download file from NCBI server into data folder
-cd data
-#curl ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE72nnn/GSE72857/suppl/GSE72857%5Fumitab%2Etxt%2Egz -o GSE72857_umitab.txt.gz
-
-curl -o barcodes.tsv.gz 'https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSM3396161&format=file&file=GSM3396161%5Fbarcodes%5FA%2Etsv%2Egz'
-curl -o features.tsv.gz 'https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSM3396161&format=file&file=GSM3396161%5Fgenes%5FA%2Etsv%2Egz'
-curl -o matrix.mtx.gz 'https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSM3396161&format=file&file=GSM3396161%5Fmatrix%5FA%2Emtx%2Egz'
-```
-
-
 ### Loading matrix into R
 
 ```{r, eval=FALSE}
@@ -88,11 +72,13 @@ sce <- runUMAP(sce, dimred = "PCA", n_dimred = 50,   ncomponents = 2, spread=1,
 
 
 #Plot the clusters
-plotReducedDim(sce, dimred = "UMAP",colour_by = "louvain_SNNk5",text_by = "louvain_SNNk5")
+set.seed(1)
+sce$kmeans <- kmeans(reducedDim(sce,"UMAP"),centers = 25,nstart = 20)$cluster
+plotReducedDim(sce, dimred = "UMAP",colour_by = "kmeans",text_by = "kmeans")
 
 #Save the objects as separate matrices for input in slingshot
 dimred <- reducedDim(sce, type = "UMAP")
-clustering <- sce$louvain_SNNk5
+clustering <- sce$kmeans
 counts <- as.matrix( counts(sce)[ top.hvgs , ] )
 ```
 
@@ -120,18 +106,20 @@ data <- FindVariableFeatures(data, nfeatures = 5000)
 data <- ScaleData(data)
 data <- RunPCA(data,npcs = 50)
 
-data <- FindNeighbors(data,k.param = 20)
+data <- FindNeighbors(data,k.param = 10)
 data <- FindClusters(data,resolution = 0.8)
 
-data <- RunUMAP(data, n.neighbors = 15, dims = 1:50,spread = 1.5,min.dist = .5,
-                repulsion.strength = .001,metric="euclidean",n.epochs = 100,negative.sample.rate = 5 )
+data <- RunUMAP(data, n.neighbors = 10, dims = 1:50,spread = 1,min.dist = .2,
+                repulsion.strength = .01,metric="euclidean",n.epochs = 100,negative.sample.rate = 10 )
 
 #Plot the clusters
-DimPlot(data, group.by = "RNA_snn_res.0.8",label = T)
+set.seed(1)
+data$kmeans <- kmeans(data@[email protected],centers = 25,nstart = 20)$cluster
+DimPlot(data, group.by = "kmeans",label = T)
 
 #Save the objects as separate matrices for input in slingshot
 dimred <- data@[email protected]
-clustering <- data$RNA_snn_res.0.8
+clustering <- data$kmeans
 counts <- as.matrix( data@assays$RNA@counts[ data@[email protected] , ] )
 ```
 
@@ -159,7 +147,7 @@ lineages
 #Plot the lineages
 par(mfrow=c(1,2))
 plot(dimred[,1:2], col = pal[clustering],  cex=.5,pch = 16)
-for(i in levels(clustering)){ 
+for(i in levels(factor(clustering))){ 
   text( mean(dimred[clustering==i,1]),
         mean(dimred[clustering==i,2]), labels = i,font = 2) }
 plot(dimred[,1:2], col = pal[clustering],cex=.5, pch = 16)
@@ -183,22 +171,13 @@ First, we need to make sure to identify which cluster is the progenitor cell. In
 
 ```{r}
 #Seurat
-DimPlot(data, group.by = "RNA_snn_res.0.8",label = T)
+DimPlot(data, group.by = "kmeans",label = T)
 FeaturePlot(data,features = "CD34",order = T)
-FeaturePlot(data,features = "CD19",order = T)
-FeaturePlot(data,features = "G0S2",order = T)
-FeaturePlot(data,features = "CD3E",order = T)
-FeaturePlot(data,features = "CST3",order = T)
-FeaturePlot(data,features = "NKG7",order = T)
 
 
 #Scran/Scater
-plotReducedDim(sce, dimred = "UMAP",colour_by = "louvain_SNNk5",text_by = "louvain_SNNk5")
+plotReducedDim(sce, dimred = "UMAP",colour_by = "kmeans",text_by = "kmeans")
 plotReducedDim(sce, dimred = "UMAP",colour_by = "CD34",by_exprs_values = "logcounts")
-plotReducedDim(sce, dimred = "UMAP",colour_by = "NKG7",by_exprs_values = "logcounts")
-plotReducedDim(sce, dimred = "UMAP",colour_by = "CD3E",by_exprs_values = "logcounts")
-plotReducedDim(sce, dimred = "UMAP",colour_by = "MS4A1",by_exprs_values = "logcounts")
-plotReducedDim(sce, dimred = "UMAP",colour_by = "CST3",by_exprs_values = "logcounts")
 ```
 
 Then, we can insert that information on where the trajectory starts on the `getLineages` function.
@@ -208,15 +187,15 @@ Then, we can insert that information on where the trajectory starts on the `getL
 set.seed(1)
 lineages <- getLineages(data = dimred,
                         clusterLabels = clustering,
-                        end.clus = c("8","12","9","11","1","12"), #define how many branches/lineages to consider
-                        start.clus = "7") #define where to start the trajectories
+                        #end.clus = c("4","3","13","9"), #define how many branches/lineages to consider
+                        start.clus = "11") #define where to start the trajectories
 
 lineages
 
 #Plot the lineages
 par(mfrow=c(1,2))
 plot(dimred[,1:2], col = pal[clustering],  cex=.5,pch = 16)
-for(i in levels(clustering)){ 
+for(i in levels(factor(clustering))){ 
   text( mean(dimred[clustering==i,1]),
         mean(dimred[clustering==i,2]), labels = i,font = 2) }
 plot(dimred, col = pal[clustering],  pch = 16)
@@ -235,7 +214,7 @@ Once the clusters are connected, Slingshot allows you to transform them to a smo
 Since the function `getCurves()` takes some time to run, we can speed up the convergence of the curve fitting process by reducing the amount of cells to use in each lineage. Ideally you could all cells, but here we had set `approx_points` to 300 to speed up. Feel free to adjust that for your dataset.
 
 ```{r}
-curves <- getCurves(lineages, approx_points = 300, thresh = 0.01, stretch = .8, allow.breaks = FALSE, shrink=.99)
+curves <- getCurves(lineages, approx_points = 20, thresh = 0.01, stretch = .8, allow.breaks = FALSE, shrink=.99)
 curves
 
 plot(dimred, col = pal[clustering], asp = 1, pch = 16)
@@ -268,11 +247,11 @@ The fitting of GAMs can take quite a while, so for demonstration purposes we fir
 library(tradeSeq)
 
 #Removing some genes to speed up the computations for this tutorial
-filt_counts <- counts [ rowSums(counts > 5) > ncol(counts)/100, ] 
+filt_counts <- counts [ rowSums(counts > 1) > ncol(counts)/20, ] 
 dim(filt_counts)
 
-sce <- fitGAM(  counts = as.matrix(filt_counts),
-                sds = curves )
+sceGAM <- fitGAM(  counts = as.matrix(filt_counts),
+                sds = curves, parallel=T )
 
 plotGeneCount(curves, filt_counts, clusters = clustering, models = sce)
 ```
@@ -284,7 +263,7 @@ plot_differential_expression <- function(feature_id) {
 feature_id <- pseudotime_association %>% filter(pvalue < 0.05) %>% top_n(1, -waldStat) %>% pull(feature_id)
 cowplot::plot_grid(
   plotGeneCount(curves, filt_counts, gene=feature_id[1], clusters = clustering, models = sce)+ ggplot2::theme(legend.position = "none"),
-  plotSmoothers(sce, as.matrix(counts), gene = feature_id[1])
+  plotSmoothers(sceGAM, as.matrix(counts), gene = feature_id[1])
 )}
 
 ```
@@ -293,18 +272,23 @@ cowplot::plot_grid(
 
 #### Genes that change with pseudotime
 
+
+We can first look at general trends of gene expression across pseudotime.
+
 ```{r}
-pseudotime_association <- associationTest(sce)
+pseudotime_association <- associationTest(sceGAM)
 pseudotime_association$fdr <- p.adjust(pseudotime_association$pvalue, method = "fdr")
 pseudotime_association <- pseudotime_association[ order(pseudotime_association$pvalue), ]
 pseudotime_association$feature_id <- rownames(pseudotime_association)
+pseudotime_association <- pseudotime_association[pseudotime_association$pvalue!=0,]
 ```
 
+And then plot all curves together:
 
 ```{r}
 feature_id <- pseudotime_association %>%
-  filter(pvalue < 0.05) %>%
-  top_n(10, -waldStat) %>%
+  filter(pvalue < 0.00001) %>%
+  top_n(-10, waldStat) %>%
   pull(feature_id)
 
 feature_id
@@ -319,15 +303,28 @@ plot_differential_expression(feature_id[1])
 We can define custom pseudotime values of interest if we’re interested in genes that change between particular point in pseudotime. By default, we can look at differences between start and end:
 
 ```{r}
-pseudotime_start_end_association <- startVsEndTest(sce, pseudotimeValues = c(0, 1))
+pseudotime_start_end_association <- startVsEndTest(sceGAM, pseudotimeValues = c(0, 1))
 pseudotime_start_end_association$feature_id <- rownames(pseudotime_start_end_association)
 
 feature_id <- pseudotime_start_end_association %>% 
   filter(pvalue < 0.05) %>% 
   top_n(1, waldStat) %>% 
   pull(feature_id)
 
+feature_id
+
 plot_differential_expression(feature_id)
+
+
+data(countMatrix, package = "tradeSeq")
+counts <- as.matrix(countMatrix)
+rm(countMatrix)
+data(crv, package = "tradeSeq")
+data(celltype, package = "tradeSeq")
+
+set.seed(5)
+icMat <- evaluateK(counts = counts, sds = crv, k = 3:10, 
+                   nGenes = 200, verbose = T)
 ```
 
 
@@ -342,7 +339,7 @@ More interesting are genes that are different between two branches. We may have
 * Note that the last function requires that the pseudotimes between two lineages are aligned.
 
 ```{r}
-different_end_association <- diffEndTest(sce)
+different_end_association <- diffEndTest(sceGAM)
 different_end_association$feature_id <- rownames(different_end_association)
 
 feature_id <- different_end_association %>% 
@@ -356,7 +353,7 @@ plot_differential_expression(feature_id)
 
 
 ```{r}
-branch_point_association <- earlyDETest(sce)
+branch_point_association <- earlyDETest(sceGAM)
 branch_point_association$feature_id <- rownames(branch_point_association)
 
 feature_id <- branch_point_association %>%