.Rhistory

install.packages("tidyverse")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("SeqArray")
BiocManager::install("SeqArray")
BiocManager::install("SNPRelate")
vcftools --vcf RAD_data/OL_subset.vcf --missing-indv --out RAD_data/OL_subset
setwd("/Users/Jason/github/marineomics.github.io")
rmarkdown::render_site()
rmarkdown::render_site()
install.packages(DESeq2)
install.packages('DESeq2')
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
, "rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
#install.packages(p)
}
library(p, character.only=TRUE)
}))
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
, "rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
library(p, character.only=TRUE)
}))
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
"rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
library(p, character.only=TRUE)
}))
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c("DESeq2","edgeR","arrayQualityMetrics"))
rmarkdown::render_site()
install.packages('Hmisc')
rmarkdown::render_site()
install.packages('Hmisc')
install.packages('Hmisc')
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
"rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
library(p, character.only=TRUE)
}))
install.packages('XQuartz')
devtools::install_github("natverse/nat")
if (!require("devtools")) install.packages("devtools")
# then install nat
devtools::install_github("natverse/nat")
rmarkdown::render_site()
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
"rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
library(p, character.only=TRUE)
}))
library(knitr)
knitr::opts_chunk$set(echo = TRUE)
library(knitcitations)
library(kableExtra)
opts_chunk$set(fig.width = 10,
fig.height = 5,
cache = FALSE)
cite_options(citation_format = "pandoc", max.names = 3, style = "html",
hyperlink = "to.doc")
install.packages("vegan")
install.packages(LEA)
install.packages("LEA")
rmarkdown::render_site()
install.packages("gdsfmt")
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("gdsfmt")
install.packages("SeqArray")
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("SeqArray")
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("SNPRelate")
library(knitcitations)
library(knitr)
knitr::opts_chunk$set(echo = TRUE)
library(knitcitations)
library(kableExtra)
opts_chunk$set(fig.width = 10,
fig.height = 5,
cache = FALSE)
cite_options(citation_format = "pandoc", max.names = 3, style = "html",
hyperlink = "to.doc")
```
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
colorize <- function(x, color) {
if (knitr::is_latex_output()) {
sprintf("\\textcolor{%s}{%s}", color, x)
} else if (knitr::is_html_output()) {
sprintf("<span style='color: %s;'>%s</span>", color,
x)
} else x
}
#set bash code chunks to use bash_profile
knitr::opts_chunk$set(engine.opts = list(bash = "-l"))
install.packages("tidyverse")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("SeqArray")
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
colorize <- function(x, color) {
if (knitr::is_latex_output()) {
sprintf("\\textcolor{%s}{%s}", color, x)
} else if (knitr::is_html_output()) {
sprintf("<span style='color: %s;'>%s</span>", color,
x)
} else x
}
#set bash code chunks to use bash_profile
knitr::opts_chunk$set(engine.opts = list(bash = "-l"))
library(SeqArray) # efficient storage and filtering of genomic data
library(tidyverse) # plotting data formatting and manipulation
# This code reads in a comma-delimited STRATA/meta-data file, randomises samples, and returns a file with wells and plates and duplicates which may be used to assist with plating libraries
# ** TODO** will need to consider where to duplicate samples - need to consider STARTA, DOC, ect..?
# maybe have user write in a list of samples to replicate
wells = 96 # how many wells can you use on your plate?
data.in = read.table("./data/example.metadata.csv", sep = ",", header = T) # each sample should be in its own ROW
R.version()
R.Version()
# This code reads in a comma-delimited STRATA/meta-data file, randomises samples, and returns a file with wells and plates and duplicates which may be used to assist with plating libraries
# ** TODO** will need to consider where to duplicate samples - need to consider STARTA, DOC, ect..?
# maybe have user write in a list of samples to replicate
wells = 96 # how many wells can you use on your plate?
data.in = read.table("./data/example.metadata.csv", sep = ",", header = T) # each sample should be in its own ROW
library(SeqArray) # efficient storage and filtering of genomic data
library(tidyverse) # plotting data formatting and manipulation
library(SNPRelate) # PCA and other popgen analyses
# This code reads in a comma-delimited STRATA/meta-data file, randomises samples, and returns a file with wells and plates and duplicates which may be used to assist with plating libraries
# ** TODO** will need to consider where to duplicate samples - need to consider STARTA, DOC, ect..?
# maybe have user write in a list of samples to replicate
wells = 96 # how many wells can you use on your plate?
data.in = read.table("./data/example.metadata.csv", sep = ",", header = T) # each sample should be in its own ROW
# Chunk 1
library(knitr)
knitr::opts_chunk$set(echo = TRUE)
library(knitcitations)
library(kableExtra)
opts_chunk$set(fig.width = 10,
fig.height = 5,
cache = FALSE)
cite_options(citation_format = "pandoc", max.names = 3, style = "html",
hyperlink = "to.doc")
```
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
colorize <- function(x, color) {
if (knitr::is_latex_output()) {
sprintf("\\textcolor{%s}{%s}", color, x)
} else if (knitr::is_html_output()) {
sprintf("<span style='color: %s;'>%s</span>", color,
x)
} else x
}
#set bash code chunks to use bash_profile
knitr::opts_chunk$set(engine.opts = list(bash = "-l"))
# This code reads in a comma-delimited STRATA/meta-data file, randomises samples, and returns a file with wells and plates and duplicates which may be used to assist with plating libraries
# ** TODO** will need to consider where to duplicate samples - need to consider STARTA, DOC, ect..?
# maybe have user write in a list of samples to replicate
wells = 96 # how many wells can you use on your plate?
data.in = read.table("./data/example.metadata.csv", sep = ",", header = T) # each sample should be in its own ROW
filename = "OL_subset" #replace with your file name
filename.gds = paste0("POP_02_RADseq_files/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq_files/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
gdsin = SeqArray::seqOpen(filename.gds)
print(paste0("The number of SAMPLES in data: ", length(c(SeqArray::seqGetData(gdsin, "sample.id")))))
print(paste0("The number of SNPs in data: ",  length(c(SeqArray::seqGetData(gdsin, "variant.id")))))
metafile = "POP_02_RADseq_files/OL.popmap"
sample.ids = seqGetData(gdsin, "sample.id")
??seqGetData
library(SeqArray) # efficient storage and filtering of genomic data
library(tidyverse) # plotting data formatting and manipulation
library(SNPRelate) # PCA and other popgen analyses
filename = "OL_subset" #replace with your file name
filename.gds = paste0("POP_02_RADseq_files/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq_files/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
metafile = "POP_02_RADseq_files/OL.popmap"
sample.ids = seqGetData(gdsin, "sample.id")
sample.strata =  read.table(metafile, header = T, sep = "\t") %>%
dplyr::select(ID, STRATA, PLATE)
#using previously loaded gdsin object
print("Per variant: ")
summary(m1 <- SeqArray::seqMissing(gdsin, per.variant=TRUE))
metafile = "POP_02_RADseq_files/OL.popmap"
sample.ids = seqGetData(gdsin, "sample.id")
sample.strata =  read.table(metafile, header = T, sep = "\t") %>%
dplyr::select(ID, STRATA, PLATE)
#using previously loaded gdsin object
print("Per variant: ")
summary(m1 <- SeqArray::seqMissing(gdsin, per.variant=TRUE))
#using previously loaded gdsin object
print("Per variant: ")
summary(m1 <- SeqArray::seqMissing(gdsin, per.variant=TRUE))
#using previously loaded gdsin object
print("Per variant: ")
summary(m1 <- SeqArray::seqMissing(gdsin, per.variant=TRUE))
summary(m1 <- SeqArray::seqMissing(gdsin, per.variant=TRUE))
?SeqArray
View(gdsin)
gdsin[["filename"]]
gdsin[["root"]]
filename = "OL_subset" #replace with your file name
filename.gds = paste0("POP_02_RADseq_files/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq_files/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
filename = "OL_subset" #replace with your file name
filename.gds = paste0("POP_02_RADseq_files/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq_files/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
filename = "OL_subset" #replace with your file name
filename.gds = paste0("POP_02_RADseq_files/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq_files/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
filename = "OL_subset" #replace with your file name
filename.gds = paste0("POP_02_RADseq_files/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq_files/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
filename.gds = paste0("POP_02_RADseq_files/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq_files/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
metafile = "POP_02_RADseq_files/OL.popmap"
sample.ids = seqGetData(gdsin, "sample.id")
sample.strata =  read.table(metafile, header = T, sep = "\t") %>%
dplyr::select(ID, STRATA, PLATE)
#using previously loaded gdsin object
print("Per variant: ")
summary(m1 <- SeqArray::seqMissing(gdsin, per.variant=TRUE))
gdsin[["root"]]
gdsfmt::showfile.gds(closeall=TRUE)
gdsfmt::showfile.gds(closeall=TRUE)
rmarkdown::render_site()
metafile = "POP_02_RADseq/OL.popmap"
sample.ids = seqGetData(gdsin, "sample.id")
?seqGetData
library(SeqArray) # efficient storage and filtering of genomic data
gdsin = SeqArray::seqOpen(filename.gds)
filename = "OL_subset" #replace with your file name
filename.gds = paste0("POP_02_RADseq/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
gdsin = SeqArray::seqOpen(filename.gds)
print(paste0("The number of SAMPLES in data: ", length(c(SeqArray::seqGetData(gdsin, "sample.id")))))
print(paste0("The number of SNPs in data: ",  length(c(SeqArray::seqGetData(gdsin, "variant.id")))))
metafile = "POP_02_RADseq/OL.popmap"
sample.ids = seqGetData(gdsin, "sample.id")
sample.strata =  read.table(metafile, header = T, sep = "\t") %>%
dplyr::select(ID, STRATA, PLATE)
sample.strata =  read.table(metafile, header = T, sep = "\t") %>%
dplyr::select(ID, STRATA, PLATE)
library(tidyverse) # plotting data formatting and manipulation
library(SNPRelate) # PCA and other popgen analyses
filename = "OL_subset" #replace with your file name
filename.gds = paste0("POP_02_RADseq/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
gdsin = SeqArray::seqOpen(filename.gds)
print(paste0("The number of SAMPLES in data: ", length(c(SeqArray::seqGetData(gdsin, "sample.id")))))
print(paste0("The number of SNPs in data: ",  length(c(SeqArray::seqGetData(gdsin, "variant.id")))))
vcftools --vcf POP_02_RADseq/OL_subset.vcf --missing-indv --out POP_02_RADseq/OL_subset
vcftools --vcf POP_02_RADseq/OL_subset.vcf --missing-indv --out POP_02_RADseq/OL_subset
library(SeqArray) # efficient storage and filtering of genomic data
library(tidyverse) # plotting data formatting and manipulation
library(SNPRelate) # PCA and other popgen analyses
gdsin = SeqArray::seqOpen(filename.gds)
filename = "OL_subset" #replace with y our file name
filename.gds = paste0("POP_02_RADseq/", paste0(filename, ".gds"))
filename.vcf = paste0("POP_02_RADseq/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
gdsin = SeqArray::seqOpen(filename.gds)
sample.ids = seqGetData(gdsin, "sample.id")
knitr::include_graphics("ADMIN_01_submissions_instructions_files/Rivera_etal_fig.png")
getwd
getwd()