forked from ilarsf/gwasTools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshiny_lmor_func.R
212 lines (208 loc) · 8.07 KB
/
shiny_lmor_func.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# ==========================================================================
# Author: Luke Lloyd-Jones
# Date started: 10/10/2016
# Date last updated: 09/02/2018
# Function takes summary statistics from a linear model or
# linear mixed model genome-wide association study and
# maps the genetic effect estimates to the odds ratio. The
# mapped results are appended to the original data frame.
# ==========================================================================
LmToOddsRatio <- function(lmm.fil, k, std.err)
{
# ------------------------------------------------------------------------
# Args:
# lmm.fil: Data frame with genetic effect (BETA),
# allele frequency (FREQ) and sample prevalence (K)
# k: If the user just want to specify one k for all
# genetic variants
# std.err: Default should be 0. Logical for if the standard error
# transformation is required. std.err = 1 implies the SE
# transformation will be returned. WARNING: REQUIRES A
# "SE" COLUMN AND N = NUMBER OF INDIVIDUALS COLUMN
#
# Returns:
# Original lmm.fil data frame with the odds ratio appended
# ------------------------------------------------------------------------
# Grab the columns names
df.names <- colnames(lmm.fil)
# Get the column name for the allele frequency
p.col <- which(df.names == "Freq" | df.names == "FREQ" |
df.names == "FRQ" | df.names == "A1FREQ" |
df.names == "af"| df.names == "freq")
# If one of the column names is not in the set of possible names request
# the user to change the column name
if (length(p.col) == 0)
{
stop("Please rename the allele frequency column FREQ or add an allele
frequency for the effect allele from a reference. Alternatively,
check that you have the correct seperator for your results file.")
}
# ---------------------------------------------------
# Get the column name for the reference allele effect
# ---------------------------------------------------
p.beta <- which(df.names == "b" | df.names == "BETA" | df.names == "beta" |
df.names == "EFFECT")
# If one of the column names is not in the set of possible names
# request the user to change the column name
if (length(p.beta) == 0)
{
stop("Please rename the genetic effect column BETA or check that you
have the correct seperator for your results file")
}
if (length(p.beta) > 1)
{
stop("You have multiple genetic coefficent columns please just submit one")
}
# Check to see if the effects have any NAs. Remove and notify user
if (length(which(is.na(lmm.fil[, p.beta]))) != 0)
{
print("Removing results with NAs for the genetic effect")
lm.fil <- lmm.fil[-which(is.na(lmm.fil[, p.beta])), ]
}
# Check to see if the allele frequencies have any NAs.
# Remove and notify user
if (length(which(is.na(lmm.fil[, p.col]))) != 0)
{
print("Removing results with no allele frequency")
lm.fil <- lmm.fil[-which(is.na(lmm.fil[, p.beta])), ]
}
# Set the variables for the allele and the effect
p <- lmm.fil[, p.col]
beta <- lmm.fil[, p.beta]
# ------------------------
# Odds ratio 3. A remnant.
# ------------------------
p_1 <- p + (beta * p * (1 - p)) / k
p_0 <- (p - k * p_1) / (1 - k)
or.map.1 <- (p_1 * (1 - p_0)) / (p_0 * (1 - p_1))
# -----------------------
# Solve for the quadratic
# -----------------------
pp <- p
c1 <- (1 - k) * (3 * beta - 2 * k * beta) + (beta * ((1 - k) ^ 2) *
(1 - 2 * k)) / k
c2 <- (1 - k) * (1 - 4 * beta * pp) - (2 * beta * pp * (1 - k) *
(1 - 2 * k)) / k
c3 <- (pp ^ 2) * beta * (1 - 2 * k) / k + pp * (k - 1 + beta)
f0 <- c1 + c2 + c3
# ------------------------------------------------------------------------
# Calculate the upper and lower bounds for returning sensible ORs
# ------------------------------------------------------------------------
lb <- (p * k - p * (k ^ 2) - k * (1 - k)) /
((1 - k) - 2 * p * (1 - k) + p ^ 2 - 2 * (p ^ 2) * k + p * k)
ub <- (p * k - p * (k ^ 2)) / (p ^ 2 - 2 * (p ^ 2) * k + p * k)
# Set up an empty array
or.map.gld <- array(0, length(p))
if (length(which(is.na(c3))) != 0 & length(which(is.na(f0))) != 0)
{
# --------------------
# Odds ratio quadratic
# --------------------
p.0.1 <- (-c2 + sqrt(c2 ^ 2 - 4 * c1 * c3)) / (2 * c1)
p.0.2 <- (-c2 - sqrt(c2 ^ 2 - 4 * c1 * c3)) / (2 * c1)
# There should be only one solution in (0, 1). Take the set that
# has all elements in (0, 1)
if (all(p.0.1 > 0 & p.0.1 < 1))
{
p_0 <- p.0.1
} else if (all(p.0.2 > 0 & p.0.2 < 1))
{
p_0 <- p.0.2
}
p_1 <- (p - (1 - k) * p_0 ) / k
or.map.gld <- (p_1 * (1 - p_0)) / (p_0 * (1 - p_1))
or.map.gld[which(is.na(c3))] <- NA
or.map.gld[which(is.na(f0))] <- NA
} else if (!all(beta > lb & beta < ub))
{
c1[which(beta < lb | beta> ub)] <- NA
c2[which(beta < lb | beta> ub)] <- NA
c3[which(beta < lb | beta> ub)] <- NA
p.0.1 <- (-c2 + sqrt(c2 ^ 2 - 4 * c1 * c3)) / (2 * c1)
p.0.2 <- (-c2 - sqrt(c2 ^ 2 - 4 * c1 * c3)) / (2 * c1)
# There should be only one solution in (0, 1).
# Take the set that has all elements in (0, 1)
if (!all(is.na(p.0.1)) & !all(is.na(p.0.1)))
{
if (all(p.0.1[-which(is.na(p.0.1))] > 0 &
p.0.1[-which(is.na(p.0.1))] < 1))
{
p_0 <- p.0.1
} else if (all(p.0.2[-which(is.na(p.0.2))] > 0 &
p.0.2[-which(is.na(p.0.2))] < 1))
{
p_0 <- p.0.2
}
} else
{
p_0 <- NA
}
p_1 <- (p - (1 - k) * p_0 ) / k
or.map.gld <- (p_1 * (1 - p_0)) / (p_0 * (1 - p_1))
} else
{
# If there are no concerns with NAs and ORs on the boundary
# calculate the full set of results
or.map.gld <- (p_1 * (1 - p_0)) / (p_0 * (1 - p_1))
}
# ------------------------------------------------------------------------
# If standard error mappings are requested
# ------------------------------------------------------------------------
if (std.err == 1)
{
p.se <- which(df.names == "SE" | df.names == "se")
p.n <- which(df.names == "N" | df.names == "NMISS" |
df.names == "nmiss")
# If one of the column name is not in the set of possible names request
# the user to change the column name
if (length(p.se) == 0)
{
stop("Please rename the standard error of the genetic effect column
SE or check that you have the correct seperator for your results
file")
}
if (length(p.n) == 0)
{
stop("Please rename the number of individuals column N or check
that you have the correct seperator for your results file")
}
se <- lmm.fil[, p.se]
n <- lmm.fil[, p.n]
d <- (beta ) / (2 * ((se ^ 2) * (n - 2) + (beta ^ 2)))
a <- -beta
b <- beta - 2 * d * k * beta
c <- beta * 2 * k * (1 - k) * (d ^ 2) - k * (1 - k) * d +
k * beta * (d - (d^2))
p0_est.1 <- (-b - sqrt(b ^ 2 - 4 * a * c)) / (2 * a)
p0_est.2 <- (-b + sqrt(b ^ 2 - 4 * a * c)) / (2 * a)
p0.prop <- array(NA, length(p0_est.1))
p0.prop[which(beta < 0)] <- p0_est.1[which(beta < 0)]
p0.prop[which(beta >= 0)] <- p0_est.2[which(beta >= 0)]
p1_est <- p0.prop + d
or.est.2 <- (p1_est - p1_est * p0.prop) / (p0.prop - p1_est * p0.prop)
# Calculate the SE quadratic to evaluate when the transformation
# will break down
var <- se^2
a <- ((n - 2) ^ 2)
b <- (2 * (beta ^ 2) * (n - 2) - 2 * k * (n - 2) + 2 * (k^2) * (n-2))
c <- (beta^4) + (beta^2) * (k^2) - (beta^2) * k
sd <- var^2 * a + var * b + c
if (!all(sd > 0))
{
or.est.2[which(sd < 0)] <- NA
}
# Return the results with the SE mapping
res <- data.frame(lmm.fil, or.est.2)
colnames(res) <- c(colnames(lmm.fil), "OR_SE")
return(res)
}
# -------------------
# Bind up the results
# -------------------
if (std.err != 1)
{
res <- data.frame(lmm.fil, or.map.gld)
colnames(res) <- c(colnames(lmm.fil), "OR")
return(res)
}
}