-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPierro_project.R
92 lines (68 loc) · 5.22 KB
/
Pierro_project.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#Daniela Pierro
#Dec 10, 2021
#Dr. T
#Advanced Data Analysis
#Project Title: Examining Plant Community Composition during Fire Recovery at the Bernard Field Station
# Examining data collected after the Bernard Field Station fire, comparing plant cover between treatments C and R: control, vs. removal of non-native plants.
#note: github release omits the unpublished data.
#install relevant packages and libraries
library(vegan)
library(tidyr)
library(purrr)
library(dplyr)
library(RVAideMemoire)
#read the data files shared by Dr. T on Nov 30th, containing post-fire BFS plant coverage data.
#all NA values were replaced with 0, all species columns with 0 total counts were removed, and the counts of species in each treatment group were aggregated into one sum number.
#to limit the number of columns, we grouped species both taxonomically and by life history categories
aggregated_data <- read.csv("./BFS_project/Aggregated_BFS_cover_data.csv") #import aggregated numeric data with species grouped taxonomically
categorized_data <- read.csv("./BFS_project/BFS_cover_data.csv") #import aggregated numeric data with species grouped by life history categories
View(aggregated_data)
View(categorized_data)
#read the updated data file for corrected treatments shared by Dr. T
treat <- read.csv("./BFS_project/data/Treatments_corrected.csv")
#remove the X column
aggregated_data_subset <- subset(aggregated_data,select = -c(X))
categorized_data_subset <- subset(categorized_data,select = -c(X))
dim(aggregated_data) #check that the column was removed
dim(aggregated_data_subset)
dim(categorized_data)
dim(categorized_data_subset)
#run metaMDS using bray curtis distance
ord_agg=metaMDS(aggregated_data_subset,distance="bray",k=2,trymax=5000) #use meta MDS to "drop in" 5000 times in the parameter space to search for a local best-fit solution and converge on a global optimum.
ord_cat=metaMDS(categorized_data_subset,distance="bray",k=2,trymax=5000) #repeat for the categorized dataset
#visualize metaMDS results in stressplots
stressplot(ord_agg) #visualize the ord data in a stressplot, showing 2-dimensional distances that capture the multidimensional distances between data points. This plot compares observed dissimilarity with ordination distance
stressplot(ord_cat)
#plot the aggregated data
colorvalues <- c("#D55E00","#009E73")
plot(ord_agg,type="n") #plot our data, beginning with a generic blank plot
ordispider(ord_agg,treat$x,col=colorvalues,label=TRUE,cex=0.7) #examine variation among years (center points) across plots sampled within years (tips of radiating lines)
#text(ord_agg,display="species",cex=0.7) #label the species
#points(ord_agg,disp="sites") #show locations of each plot
#to simplify the ordispider plot, we will filter out species labels to only include the most relevant species: the most abundant as determined by an inverse Simpson measure of diversity
relevant_species_agg = diversity(aggregated_data_subset,index="invsimpson",MARGIN=2)
orditorp(ord_agg,display="species",priority=relevant_species_agg,col="black",cex=0.75) #remake the plot with syntax to include on priority species lables, drawing on priSpp to determine which species are plotted
#repeat ordispider for data categorized by life history
plot(ord_cat,type="n") #plot our data, beginning with a generic blank plot
ordispider(ord_cat,treat$x,col=colorvalues,label=TRUE,cex=0.7)
relevant_species_cat = diversity(categorized_data_subset,index="invsimpson",MARGIN=2)
orditorp(ord_cat,display="species",priority=relevant_species_cat,col="black",cex=0.75) #remake the plot with syntax to include on priority species lables, drawing on priSpp to determine which species are plotted
#calculate bray-curtis distances
unlist(lapply(aggregated_data_subset,class)) #check all columns are numeric
unlist(lapply(categorized_data_subset,class)) #check all columns are numeric
bc_agg = vegdist(aggregated_data_subset, "bray") #calculate bray-curtis distance using the numeric data
results_agg=meandist(bc_agg,treat) #calculate mean distances among treatments
summary(results_agg) #the bray-curtis distance is about 0.3096085
bc_cat = vegdist(categorized_data_subset, "bray") #calculate bray-curtis distance using the numeric data
results_cat=meandist(bc_cat,treat) #calculate mean distances among treatments
summary(results_cat) #the bray-curtis distance is slightly smaller: 0.227758
#PERMANOVA
permanova_agg=adonis(aggregated_data~treat$x,method="bray",perm=999) #run a PERMANOVA. We use the Bray Curtis distance, and assess whether the treatments are significantly different. We will run 999 permutations.
permanova_agg #0.964, insignificant difference between the treatment groups
permanova_cat=adonis(categorized_data~treat$x,method="bray",perm=999) #run a PERMANOVA. We use the Bray Curtis distance, and assess whether the treatments are significantly different. We will run 999 permutations.
permanova_cat #0.904, insignificant difference between the treatment groups
#check the homogeneity of variances, by assessing multivariate homogeneity of group dispersions
betadisper(bc_agg,treat$x)
betadisper(bc_cat,treat$x)
#in analyses comparing the control and treatment groups for both species categorizations, there is about as much variation between groups as within groups.
#future directions could include a random effects model