-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPlasticity_gene_expression_SI.Rmd
334 lines (314 loc) · 21.9 KB
/
Plasticity_gene_expression_SI.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
---
output:
html_document:
toc: false
toc_float: false
fig_width: 6
fig_height: 6
fig_caption: true
---
<style type="text/css">
.main-container {
max-width: 800px !important;
margin-left: auto !important;
margin-right: auto !important;
}
</style>
<br>
<br>
<span style = "font-weight:normal; color:#000000; font-size: 28px; font-family:Times, serif;">
<center>Plasticity in Gene Expression in Response to Embryonic Environment Supporting Information </center></span>
<br>
<span style = "font-weight:normal; color:#000000; font-size: 12px; font-family:Times, serif">
Anthony A Snead<sup>1,8</sup>, Corey R Quackenbush<sup>2</sup>, Shawn Trojahn<sup>2</sup>, Anna L McDonald<sup>2</sup>, Luana Lins<sup>2,9</sup> Chris Cornelius<sup>1</sup>, Paula E Adams<sup>1,10</sup>, Dengke Ma<sup>3</sup> Yuying Hsu<sup>4</sup>, Eric Haag<sup>5</sup>, Frédéric Silvestre<sup>6</sup>, Akira Kanamori<sup>7</sup>, Ryan L Earley<sup>1\*</sup>, Joanna L Kelley<sup>2,11\*</sup></p>
<br>
<span style = "font-weight:bold;color:#000000; font-size: 12px; font-family:Times, serif"> Affiliations: </span> <br>
<span style = "font-weight:normal; color:#000000; font-size: 12px; font-family:Times, serif">
<sup>1</sup>Department of Biological Sciences, University of Alabama, 300 Hackberry Lane, Box 870344, Tuscaloosa, AL 35487
<br>
<sup>2</sup>School of Biological Sciences, Washington State University, 100 Dairy Road, Pullman, WA 99164
<br><sup>3</sup>Cardiovascular Research Institute and Department of Physiology, University of California San Francisco, San Francisco, California, USA
<br><sup>4</sup>Department of Life Sciences, National Taiwan Normal University, Taipei 116, Taiwan
<br><sup>5</sup>Department of Biology and Biological Sciences Graduate Program, University of Maryland, College Park, MD 20742
<br><sup>6</sup>Laboratory of Evolutionary and Adaptive Physiology, Institute of Life, Earth, and the Environment, University of Namur, 61 Rue de Bruxelles, 5000, Namur, Belgium
<br><sup>7</sup>Division of Biological Science, Graduate School of Science, Nagoya University, Aichi 464-8602, Japan
<br><sup>8</sup>Current address: Department of Biology, New York University, New York, NY 10003
<br><sup>9</sup>Current address: Australian National Insect Collection, CSIRO, Canberra, Australia
<br><sup>10</sup>Current address: Department of Biological Sciences, Auburn University, Auburn, AL, USA
<br><sup>11</sup>Current address: Department of Ecology and Evolutionary Biology, University of California Santa Cruz, Santa Cruz, CA, USA</span>
<br>
<br>
<span style = "font-weight:bold;color:#000000; font-size = 12px; font-family:Times, serif">Authors for correspondence:</span>
<br>
<span style = "font-weight:normal;color:#000000; font-size = 12px; font-family:Times, serif"><sup>\*</sup>Joanna L. Kelley, Department of Ecology and Evolutionary Biology, University of California Santa Cruz, Santa Cruz, CA, USA, [email protected]
<br>
<sup>\*</sup>Ryan Earley, Department of Biological Sciences, University of Alabama, 300 Hackberry Lane, Box 870344, Tuscaloosa, AL 35487</span>
<br>
<br>
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE, number_sections = FALSE)
rmarkdown::find_pandoc(version = '2.9.1')
load("C:/Users/antho/Desktop/Ryan_Research/Rivulus_Projects/Embryo_Transcriptomics/Plastic-Gene-Rivulus/.RData")
library(rmarkdown)
library(tinytex)
library(knitr)
library(tidyverse)
library(ggplot2)
library(gt)
library(DT)
library(lubridate)
```
```{r, Temperature data, echo=FALSE, eval=FALSE}
knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE, number_sections = FALSE)
load("C:/Users/antho/Desktop/Ryan_Research/Rivulus_Projects/Embryo_RNA/Code/.RData")
Temperature.data <- lapply(list.files( "C:/Users/antho/OneDrive - The University of Alabama/Professional/Ryan_Research/Rivulus_Projects/Embryo Transcriptomics/Data/Temperature Data/20C",full.names=TRUE), read.table, sep = ",") %>%
bind_rows(.) %>%
dplyr::mutate(Treatment = "Cold",
V1 = as.POSIXct(V1, "%m/%d/%y %I:%M:%S %p",tz = "America/Chicago")) %>%
select(-V2) %>%
rename( Date = V1,
Temperature = V3) %>%
bind_rows(., (lapply(list.files("C:/Users/antho/OneDrive - The University of Alabama/Professional/Ryan_Research/Rivulus_Projects/Embryo Transcriptomics/Data/Temperature Data/25C",full.names=TRUE), read.table, sep = ",") %>%
bind_rows(.) %>%
dplyr::mutate(Treatment = "Warm",
V1 = as.POSIXct(V1, "%m/%d/%y %I:%M:%S %p",tz = "America/Chicago")) %>%
select(-V2) %>%
rename( Date = V1,
Temperature = V3)))
```
<span style = "font-weight: bold; color: #000000; font-size: 16px; font-family:Times, serif">Developmental Formula</span>
<br>
<br>
<p style = "text-indent: 25px; line-height: 2em; font-weight:normal;color:#000000; font-size:12px; font-family:Times, serif">The formula to estimate when embryos should be removed from treatment was derived from a non-linear regression with “developmental stage (DS)” (1-35, from Harrington 1968) as the predictor and “hours of development” (0-310, from Mourabit et al. 2011) as the response. The resulting formula was: </p>
<br>
<span style = "font-weight:bold;color:#000000; font-size: 12px; font-family:Times, serif"><center>
Hours of Development ~ -33.04 + 4.37<span>*</span>DS - 0.07<span>*</span>DS<sup>2</sup> - 0.007<span>*</span>DS<sup>3</sup> + 0.0017<span>*</span>DS<sup>4</sup> + 0.00009<span>*</span>DS<sup>5</sup>
</center></span>
<br>
<p style = "text-indent: 25px;line-height: 2em; font-weight:normal;color:#000000; font-size: 12px; font-family:Times, serif">
Upon collecting the egg from its mother, we scored developmental stage (DS) and, using the above formula, calculated the estimated number of hours that the embryo had already spent developing and thus, how long it should remain in treatment before being sampled. We subtracted that value from 120 (with 120 hours being the time it takes the animal to develop to stage 29), 140 hours (stage 30), 180 hours (stage 31), 211 hours (stage 32), 240 hours (stage 33), or 310 hours (stage 34) (Mourabit et al. 2011). This value then was converted to days, and dictated when we sampled the animals from treatment. We confirmed DS under a stereomicroscope when the embryo was removed from treatment, and it was this confirmed DS that was used to determine whether the embryo was pre- or post-thermolabile period.
</p>
<br>
<span style = "font-weight:bold; color:#000000; font-size: 16px; font-family:Times, serif">
References
</span>
<br>
<div style= "text-indent: -25px; padding-left: 25px; font-weight:normal;color:#000000; font-size: 12px; font-family:Times, serif;">
<br>
<p>Harrington, Jr RW (1968) Delimitation of the thermolabile phenocritical period of sex determination and differentiation in the ontogeny of the normally hermaphroditic fish Rivulus marmoratus Poey. <em>Physiological Zoology</em> 41: 447-460.</p>
<p>Mourabit S, Edenbrow M, Croft DP, Kudoh T (2011). Embryonic development of the self-fertilizing mangrove killifish <em>Kryptolebias marmoratus</em>. <em>Developmental Dynamics</em> 240: 1694-1704.</p>
</div>
<br>
<br>
<span style = "font-weight: bold; color: #000000; font-size: 16px; font-family:Times, serif">Supporting Figures</span>
<br>
<br>
```{r, Developmental Formula,fig.height = 5, fig.align='center'}
Developmental.Df <- read.csv(file = paste(getwd(), "Data/Dev_Data/Dev_Data.csv", sep = "/"))
ggplot2::ggplot(data = Developmental.Df, aes(x = Stage_M, y = Hours)) +
geom_smooth(method = "loess", fullrange = T, span = .5, se=F,
color = "darkgrey", linewidth = .75 ) +
geom_point() +
scale_y_continuous(limits = c(0,325), breaks = seq(0, 300, by = 50)) +
scale_x_continuous(limits = c(0,35), breaks = seq(0, 35, by = 5)) +
labs(x = "Developmental Stage", y = "Number of Hours Developing") +
annotate("text", x = 15, y = 300, family = "serif", size = 4.5,
label = paste("'Hours of Development ~ -33.04 + 4.37*DS - 0.07*'*DS^2~'-'", sep = ""),
parse = TRUE) +
annotate("text", x = 11, y = 275, family = "serif", size = 4.5,
label = paste("'0.007*'*DS^3~'+ 0.0017*'*DS^4~'+0.00009*'*DS^5", sep = ""),
parse = TRUE) +
theme(panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.title = element_text(family = "serif", size = 12),
axis.text = element_text(color = "black", size = 10, family = "serif"))
```
<br><span style = "font-weight:normal;color:#000000; font-size: 10px; font-family:Times, serif">SI Figure 1 The graph visualizing the non-linear regression with developmental stage, shortened to “DS” for the equation, as the predictor (x-axis) and the hours of development as the response (y-axis). The points are data used to derive the formula, and the line is the non-linear regression. The equation is displayed in the upper right corner of the graph for reference.</span>
<br>
<br>
<br>
```{r, MDS Top 500, fig.align = 'center'}
par(mar = c(4,4,1.5,.5), family ="serif")
MDS.500 <- limma::plotMDS(DGE.Data, top = 500, gene.selection = "pairwise", dim.plot = c(1,2), pch = c(24,24,24, 15,15,15, 21,21,21, 18, 18, 18), xlab = "MDS axis 1", ylab = "MDS axis 2", main = "MDS: Top 500", labels = NULL, cex = 2)
legend("bottomright", legend = c("Pre-Cold", "Pre-Warm", "Post-Cold", "Post-Warm"), pch = c(24, 15, 21, 18))
# MDS plot for top 500 genes
```
<br><span style = "font-weight:normal;color:#000000; font-size: 10px; font-family:Times, serif">SI Figure 2 The multidimensional scaling plot for the 500 genes with the largest fold change between samples using the first two principal components. Each group (Pre-Cold, Pre-Warm, Post-Cold, and Post-Warm) is differentiated by shape and color in the legend.</span>
<br>
<br>
<br>
```{r, MDS Top 1000, fig.align = 'center' }
par(mar = c(4,4,1.5,.5), family ="serif")
MDS.1000 <- limma::plotMDS(DGE.Data, top = 1000, gene.selection = "pairwise", dim.plot = c(1,2), pch = c(24,24,24, 15,15,15, 21,21,21, 18, 18, 18), xlab = "MDS axis 1", ylab = "MDS axis 2", main = "MDS: Top 1,000", labels = NULL, cex = 2)
legend("bottomright", legend = c("Pre-Cold", "Pre-Warm", "Post-Cold", "Post-Warm"), pch = c(24, 15, 21, 18))
# MDS plot for top 1,000 genes
```
<br><span style = "font-weight:normal;color:#000000; font-size: 10px; font-family:Times, serif">SI Figure 3 The multidimensional scaling plot for the 1000 genes with the largest fold change between samples using the first two principal components. Each group (Pre-Cold, Pre-Warm, Post-Cold, and Post-Warm) is differentiated by shape and color in the legend.</span>
<br>
<br>
<br>
```{r, MDS Top 10000, fig.align = "center" }
par(mar = c(4,4,1.5,.5), family ="serif")
MDS.10000 <- limma::plotMDS(DGE.Data, top = 10000, gene.selection = "pairwise",dim.plot = c(1,2), pch = c(24,24,24, 15,15,15, 21,21,21, 18, 18, 18), xlab = "MDS axis 1", ylab = "MDS axis 2", main = "MDS: Top 10,000", labels = NULL, cex = 2)
legend("bottomright", legend = c("Pre-Cold", "Pre-Warm", "Post-Cold", "Post-Warm"), pch = c(24, 15, 21, 18))
# MDS plot for top 10,000 genes
```
<br><span style = "font-weight:normal;color:#000000; font-size: 10px; font-family:Times, serif">SI Figure 4 The multidimensional scaling plot for the 10000 genes with the largest fold change between samples using the first two principal components. Each group (Pre-Cold, Pre-Warm, Post-Cold, and Post-Warm) is differentiated by shape and color in the legend.</span>
<br>
<br>
<br>
```{r - DE Volcano Plot, fig.height = 7, fig.width = 7, fig.align = "center" }
Volcano.Plots <- plotly::ggplotly(ggplot((F.tests$PreCold.PreWarm$All.Results$table %>%
tibble::as_tibble(rownames = "Gene") %>%
mutate(comparision = "Pre-Cold vs. Pre-Warm")) %>%
bind_rows( (F.tests$PreCold.PostCold$All.Results$table %>%
tibble::as_tibble(rownames = "Gene") %>%
mutate(comparision = "Pre-Cold vs. Post-Cold"))) %>%
bind_rows( (F.tests$PostCold.PostWarm$All.Results$table %>%
tibble::as_tibble(rownames = "Gene") %>%
mutate(comparision = "Post-Cold vs. Post-Warm"))) %>%
bind_rows( (F.tests$Prewarm.PostWarm$All.Results$table %>%
tibble::as_tibble(rownames = "Gene") %>%
mutate(comparision = "Pre-Warm vs. Post-Warm")))) +
aes(y = -log10(FDR), x = logFC, text = paste("Symbol:", Gene)) +
geom_point(size = 2) +
geom_hline(yintercept = -log10(0.05), linetype = "longdash", colour = "grey", size = 1) +
facet_wrap(~ comparision) +
labs( title = "Volcano plots",
subtitle = "Kryptolebias marmoratus",
caption = paste0("produced on ", Sys.time())) +
theme_bw() +
theme(
text = element_text(family = "serif")))
# Here I create an interactive volcano plot. The gray line is the p-value cut off. The vertical lines designate a log fold change of at least |1|.
Volcano.Plots
# Display the interactive volcano plots.
```
<span style = "font-weight:normal;color:#000000; font-size: 10px; font-family:Times, serif">SI Figure 5 The four-panel interactive plot is faceted by the comparison (Post-Cold vs. Post-Warm, Pre-Cold vs. Post-Cold, Pre-Cold vs. Pre-Warm, Pre-Warm vs. Post-Warm). Each point is a gene, and each panel has the negative log10 of the false discover rate adjust p-value (log odds probability) on the y- axis and the log fold change on the y axis. The dashed grey line is the significance cut off (FDR < 0.05); therefore, any genes above the line are significantly differentially expressed between the treatments.</span>
<br>
<br>
<span style = "font-weight:bold;color:#000000; font-size: 16px; font-family:Times, serif">Supporting Tables</span>
<br>
<span style = "font-weight:normal;color:#000000; font-size: 10px; font-family:Times, serif">SI Table 1 The interactive table includes all the temperature measurements for each treatment ( Warm = 25°C, Cold = 20°C) for all dates. The date column contains the date (Year-Month-Day) and time (Hour:Minutes:Seconds). Drastic changes in temperature correspond with the retrieval of the temperature probes to download data. </span>
```{r, Temperature Table}
DT::datatable(Temperature.data,
extensions = c('KeyTable', 'FixedHeader'),
filter = 'top',
options = list(initComplete = JS("function(settings, json)
{",
"$('body').css({'font-family':
'Times Roman'});",
"}"),
keys = TRUE,
searchHighlight = TRUE,
pageLength = 20,
columnDefs = list(list(className = 'dt-center', targets = "_all"))))
# Make the interactive table
```
<br>
<br>
<span style = "font-weight:normal;color:#000000; font-size: 10px; font-family:Times, serif">SI Table 2 The table contains read information per sample. Each sample is identified their sample name along with the number of reads after trimming, subsampling, and the percentage of the subsampled reads that were mapped to a gene.</span>
```{r, Number of Reads Table}
data.frame(Sample.Name = c("RPRW_112","RPOC_250","RPOC_255","RPOC_259","RPRW_272","RPOW_283","RPOW_285","RPRC_28","RPRC_38","RPRW_39","RPOW_58","RPRC_9"),
Trimmed = c("8,326,750","8,158,694","7,454,576","7,176,142","14,121,180","8,788,552","10,008,752","60,088,608","50,075,722","49,014,042","9,886,540","11,685,830"),
Subsampled = c("8,326,750","8,158,694","7,454,576","7,176,142","14,121,180","8,788,552","10,008,752","40,000,000","40,000,000","40,000,000","9,886,540","11,685,830"),
Percent.Mapped = c(42.5,85.97,83.69,84.1,45.82,84.39,90.14,90.83,90.09,91.6,83.99,83.95)) %>%
gt::gt(.) %>%
gt::cols_label(
Sample.Name = "Sample Name",
Trimmed = "Trimmed",
Subsampled = "Subsampled",
Percent.Mapped = "Percent Mapped (%)") %>%
gt::cols_align(
align = c("center"),
columns = everything()
) %>%
gt::tab_options(table.font.names = "serif")
```
<br>
<br>
<br>
<span style = "font-weight:normal;color:#000000; font-size: 10px; font-family:Times, serif">SI Table 3. The interactive table contains all the differential gene expression results. Each row contains the gene being analyzed, the gene ID, the log fold change, the log Counts per Million (CPM), the false discovery corrected p value (FDR), the comparison (Post-Cold vs. Post-Warm, Pre-Cold vs. Post-Cold, Pre-Cold vs. Pre-Warm, Pre-Warm vs. Post-Warm), the associated gene ontologies, and descriptions. The table is grouped by the gene because a gene can have multiple descriptions and gene ontologies. The groups are collapsible by clicking on the group row heading. All columns are searchable.</span>
```{r - DE Tables}
DE.Table <- (F.tests$PreCold.PreWarm$Significant.Table$table %>%
as.tibble(rownames = "Gene") %>%
mutate(Comparison = "Pre-Cold vs. Pre-Warm")) %>%
bind_rows( (F.tests$PreCold.PostCold$Significant.Table$table %>%
as.tibble(rownames = "Gene") %>%
mutate(Comparison = "Pre-Cold vs. Post-Cold"))) %>%
bind_rows( (F.tests$PostCold.PostWarm$Significant.Table$table %>%
as.tibble(rownames = "Gene") %>%
mutate(Comparison = "Post-Cold vs. Post-Warm"))) %>%
select(Gene, logFC, logCPM, FDR, Comparison) %>%
dplyr::rename("Log Fold Change" = logFC,
"Log Counts per Million" = logCPM) %>%
# I convert the results to a tibble with the comparisons as a separate column.This table only has significantly deferentially expressed genes.
dplyr::left_join(., biomaRt::getBM(filters = "external_gene_name", attributes = c("external_gene_name","ensembl_gene_id", "name_1006", "definition_1006"),
values = .$Gene, mart= Gene_Names), by = c("Gene" = "external_gene_name"), multiple = "all") %>%
dplyr::rename("GO Term Name" = "name_1006",
"Description" = "definition_1006",
"Gene ID" = "ensembl_gene_id") %>%
dplyr::select(Gene, "Gene ID", "Log Fold Change", "Log Counts per Million", FDR,
Comparison, "GO Term Name", Description) %>%
dplyr::group_by(Gene)
Int.DE.Table <- DT::datatable(DE.Table,
extensions = c('KeyTable', 'FixedHeader', 'RowGroup'),
filter = 'top',
callback = JS(
"table.on('click', 'tr.dtrg-group', function () {",
" var rowsCollapse = $(this).nextUntil('.dtrg-group');",
" $(rowsCollapse).toggleClass('hidden');",
"});"
),
options = list(initComplete = JS("function(settings, json)
{",
"$('body').css({'font-family':
'Times Roman'});",
"}"),
rowGroup = list(dataSrc = 1),
keys = TRUE,
searchHighlight = TRUE,
pageLength = 20,
columnDefs = list(list(className = 'dt-center', targets = "_all")))) %>%
DT::formatRound(columns=c(3:5), digits=3)
# Here I create and interactive table with the deferentially expressed genes. the comparison column is the test that was preformed.
Int.DE.Table
# Display the interactive table.
```
<br>
<br>
<span style = "font-weight:normal;color:#000000; font-size: 10px; font-family:Times, serif">SI Table 4 The interactive table includes the fisher’s exact test gene ontology enrichment results with the comparison, false discovery rate corrected p. value (FDR), term ID, Source, and term name. Term ID refers to the numerical code for the term name, while term name is the gene ontology. Source is the gene ontology the term belongs too (Biological Process, Molecular Function, Cellular Component).</span>
```{r - DE GO Table}
DT::datatable(apply((Fishers.gost.table %>%
dplyr::rename(Significant = significant,
"P Value" = p_value,
"Term Size" = term_size,
"Query Size" = query_size,
"Intersection Size" = intersection_size,
Precision = precision,
Recall = recall,
Source = source,
"Term Name" = term_name,
"Effective Domain Size" = effective_domain_size,
"Source Order" = source_order,
Parents = parents,
"Evidence Codes" = evidence_codes,
Intersection = intersection)),2,as.character),
extensions = c('KeyTable', 'FixedHeader', 'Buttons', 'FixedColumns','Responsive'),
options = list(
initComplete = JS("function(settings, json)
{",
"$('body').css({'font-family':
'Times Roman'});",
"}"),
dom = 'Bfrtip',
buttons = list(list(extend = 'colvis')),
fixedColumns = list(leftColumns = 2),
scrollX = TRUE,
fixedHeader = TRUE,
columnDefs = list(list(className = 'dt-center', targets = "_all")),
keys = TRUE,
searchHighlight = TRUE,
pageLength = 20))
```