-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathmap_temp_assoc.R
58 lines (48 loc) · 1.73 KB
/
map_temp_assoc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# A short script to run a genome-wide association analysis for the
# "maximum temperature of warmest month" climate variable.
# SET UP ENVIRONMENT
# ------------------
library("data.table")
# IMPORT PHENOTYPE DATA
# ---------------------
cat("Reading phenotype data.\n")
pheno <- read.csv("pheno.csv")$bio5_maxT_wm
n <- length(pheno)
cat(sprintf("Loaded %d temperature measurements.\n",n))
# IMPORT GENOTYPE DATA
# --------------------
cat("Reading genotype data.\n")
geno <- fread("geno.csv",sep = ",",header = TRUE)
class(geno) <- "data.frame"
n <- nrow(geno)
p <- ncol(geno)
cat(sprintf("Loaded %d x %d genotype matrix.\n",n,p))
# Remove all SNPs that do not vary.
cat("Filtering SNPs.\n")
s <- sapply(geno,sd)
geno <- geno[s > 0]
# COMPUTE ASSOCIATIONS
# --------------------
# Select the first 10,000 SNPs. Comment out this two lines to compute
# associations for all available SNPs.
p <- 10000
geno <- geno[1:p]
# This function takes two inputs, the genotype (x) and the phenotype
# (y), and returns the p-value indicating the support an association
# between x and y.
get.assoc.pvalue <- function (x, y)
summary(lm(y ~ x,data.frame(x = x,y = y)))$coefficients["x","Pr(>|t|)"]
# This function will return an association p-value for each column of
# the data frame "dat".
get.assoc.pvalues <- function (dat, y)
sapply(dat,function (x) get.assoc.pvalue(x,y))
# Compute the association p-values for all SNPs (that is, columns of
# the "geno" data frame).
cat(sprintf("Computing association p-values for %d SNPs.\n",p))
t0 <- proc.time()
pvalues <- get.assoc.pvalues(geno,pheno)
t1 <- proc.time()
print(t1 - t0)
# SUMMARIZE ASSOCIATION RESULTS
# -----------------------------
cat(sprintf("The smallest association p-value is %0.3e.\n",min(pvalues)))