-
Notifications
You must be signed in to change notification settings - Fork 9
/
.Rhistory
156 lines (156 loc) · 6.71 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
require(rmarkdown)
require(knitr)
require(tidyverse)
require(RColorBrewer)
require(cowplot)
require(reshape2)
require(ggdendro)
require(vegan)
require(ape)
# set the path for root directory
setwd("/Users/abby/Documents/Projects/dietstudy_analyses/")
# set seed
set.seed(42)
#### load the maps for downstream use ####
map_sample <- read.table("data/maps/SampleID_map.txt", sep = "\t", header = T, comment = "")
map_username <- read.table("data/maps/UserName_map.txt", sep = "\t", header = T, comment = "")
map_sample$StudyDate <- as.Date.factor(map_sample$StudyDate, format = "%m/%d/%y")
#load other maps
food_map <- read.table("data/maps/food_map.txt", sep = "\t", header = T, comment = "")
tax_map <- read.table("data/maps/taxonomy_norm_map.txt", sep = "\t", header = T, comment = "")
##########food##############
# load the food distance matrix, unweighted unifrac
food_un <- read.delim("data/diet/processed_food/dhydrt_smry_no_soy_beta/unweighted_unifrac_dhydrt.smry.no.soy.txt", row = 1) # weighted in not significant
food_dist <- as.dist(food_un)
# make non-tree distance matrix for food (for supplemental)
food <- read.delim("data/diet/processed_food/dhydrt.smry.no.soy.txt", row = 1)
food <- food[,colnames(food) %in% colnames(food_un)]
no_tree_dist <- dist(t(food))
##############taxonomy############
# load taxonomy collapsed for each person
tax <- read.delim("data/microbiome/processed_average/UN_tax_CLR_mean_norm_s.txt", row = 1) # updates from reviewer comments
# drop soylents
tax <- tax[,colnames(tax) %in% colnames(food_un)]
# make taxonomy distance matrix
tax_dist <- dist(t(tax))
###############nutrients############
# load nutrition data
nutr <- read.delim("data/diet/processed_nutr/nutr_65_smry_no_soy.txt", row = 1)
# normalize nutrition data across features (rows)
nutr_n <- sweep(nutr, 1, rowSums(nutr), "/")
# make nutrition distance matrix (euclidean)
nutr_dist <- dist(t(nutr_n))
# identify soylent samples
soylent <- map_sample[map_sample$UserName %in% c("MCTs11", "MCTs12"),]
soylent <- as.character(soylent$X.SampleID)
source(file = "lib/colors/UserNameColors.R")
tax_all <- read.delim("data/microbiome/processed_sample/taxonomy_clr_s.txt", row = 1)
# drop soylent
tax_all_ns <- tax_all[, !colnames(tax_all) %in% soylent]
#make dist matrix
tax_all_dist <- dist(t(tax_all_ns))
# make pcoa
tax_all_pcoa <- data.frame(pcoa(tax_all_dist)$vectors)
eigen <- pcoa(tax_all_dist)$values$Eigenvalues
percent_var <- signif(eigen/sum(eigen), 4)*100
# move rownames to a column
tax_all_pcoa <- rownames_to_column(tax_all_pcoa, var = "X.SampleID")
#merge map and PCOA by SampleID - select both lines and run together - won't work otherwise
tax_all_pcoa <- inner_join(tax_all_pcoa, map_sample, by = 'X.SampleID')
mytest <- adonis(tax_all_dist~tax_all_pcoa$UserName, permutations = 999)
plot_p <- mytest$aov.tab$`Pr(>F)`[1]
mytest
tax_all_plot_one_color <- ggplot(tax_all_pcoa, aes(x = Axis.1, y = Axis.2, fill = UserName)) +
geom_point(color = "#5f86b7", alpha=0.75, size = 2, alpha = 0.6) +
stat_ellipse(color = "dark grey", type = "norm", linetype = 2, level = 0.95, alpha = 0.5) +
xlab(paste0("PC 1 [",percent_var[1],"%]")) +
ylab(paste0("PC 2 [",percent_var[2],"%]")) +
theme_classic() +
theme(legend.position = "right",
legend.title = element_blank(),
legend.text = element_text(size = 9),
axis.text = element_text(size = 4),
axis.title = element_text(size=9)) +
guides(color = guide_legend(ncol = 4))
tax_all_plot_one_color + theme(legend.position = 'none')
food_all <- read.delim("data/diet/processed_food/dhydrt_beta/unweighted_unifrac_dhydrt.txt", row = 1)
# drop soylent
food_all_ns <- food_all[!rownames(food_all) %in% soylent, !colnames(food_all) %in% soylent]
#food_all_dist <- as.dist(food_all_ns) #for adonis testing
food_all_pcoa <- data.frame(pcoa(food_all_ns)$vectors)
eigen <- pcoa(food_all_ns)$values$Eigenvalues
percent_var <- signif(eigen/sum(eigen), 4)*100
# move rownames to a column
food_all_pcoa <- rownames_to_column(food_all_pcoa, var = "X.SampleID")
#merge map and PCOA by SampleID
food_all_pcoa <- inner_join(food_all_pcoa, map_sample, by = 'X.SampleID')
food_all_plot_one_color <- ggplot(food_all_pcoa, aes(x = Axis.1, y = Axis.2, fill = UserName)) +
geom_point(color = "#5a2071", alpha=0.75, size = 2, alpha = 0.6) +
stat_ellipse(color = "dark grey", type = "norm", linetype = 2, level = 0.95, alpha = 0.5) +
xlab(paste0("PC 1 [",percent_var[1],"%]")) +
ylab(paste0("PC 2 [",percent_var[2],"%]")) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 9),
axis.text = element_text(size = 4),
axis.title = element_text(size=9)) +
guides(color = guide_legend(ncol = 1))
food_all_plot_one_color + theme(legend.position = 'none')
# make pcoas
pcoa_f <- as.data.frame(pcoa(food_dist)$vectors)
pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)
# procrustes
pro <- procrustes(pcoa_f, pcoa_t)
pro_test <- protest(pcoa_f, pcoa_t, perm = 999)
eigen <- sqrt(pro$svd$d)
percent_var <- signif(eigen/sum(eigen), 4)*100
beta_pro <- data.frame(pro$X)
trans_pro <- data.frame(pro$Yrot)
beta_pro$UserName <- rownames(beta_pro)
beta_pro$type <- "Food (Unweighted Unifrac)"
trans_pro$UserName <- rownames(trans_pro)
trans_pro$type <- "Microbiome (Aitchison's)"
colnames(trans_pro) <- colnames(beta_pro)
pval <- signif(pro_test$signif, 1)
plot <- rbind(beta_pro, trans_pro)
food_micro <- ggplot(plot) +
geom_point(size = 2, alpha=0.75, aes(x = Axis.1, y = Axis.2, color = type)) +
scale_color_manual(values = c("#5a2071", "#5f86b7")) +
theme_classic() +
geom_line(aes(x= Axis.1, y=Axis.2, group=UserName), col = "darkgrey", alpha = 0.6) +
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size=9),
legend.position = 'bottom',
axis.text = element_text(size=4),
axis.title = element_text(size=9),
aspect.ratio = 1) +
guides(color = guide_legend(ncol = 1)) +
annotate("text", x = 0.3, y = -0.27, label = paste0("p-value=",pval), size = 2) +
xlab(paste0("PC 1 [",percent_var[1],"%]")) +
ylab(paste0("PC 2 [",percent_var[2],"%]"))
food_micro_leg <- get_legend(food_micro)
food_micro + theme(legend.position = "none")
#### MAKE 2F #####
# make pcoas
pcoa_n <- as.data.frame(pcoa(nutr_dist)$vectors)
pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)
# procrustes
pro <- procrustes(pcoa_n, pcoa_t)
pro_test <- protest(pcoa_n, pcoa_t, perm = 999)
colnames(pcoa_t)
rownames(pcoa_t)
map_username
map_username$UserName
map_username_sub <- map_username[map_username$UserName %in% rownames(pcoa_t),]
map_username_sub$UserName == rownames(pcoa_t)
gentest <- adonis(tax_dist~map_username_sub$Gender, permutations = 999)
gen_p <- gentest$aov.tab$`Pr(>F)`[1]
gen_p
gentest
## same for foods
foodtest <- adonis(food_dist~map_username_sub$Gender, permutations = 999)
foodtest