-
Notifications
You must be signed in to change notification settings - Fork 3
/
preModelingFunctions.R
282 lines (257 loc) · 9.85 KB
/
preModelingFunctions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#preModelingFunctions.R
#Set of functions to prepare data and environment to run species distribution models
#Author: Jorge Velásquez
#LoadLibraries
#Loads all libraries needed to run functions called by mxParallel
#Arguments:
# memory(string): string specifying how much memmory allocate to rJava, e.g. "512m"
# for 512 megabytes, "2g" for 2 gigabytes.
LoadLibraries<-function(memory="4g"){
options(java.parameters = paste0("-Xmx",memory))
require("gbm")
require("devtools")
require("dismo")
require("maptools")
require("plyr")
require("raster")
require("reshape2")
require("rJava")
require("rgdal")
require("rgeos")
require("SDMTools")
require("sp")
require("spatstat")
require("snowfall")
require("rlecuyer")
}
#LoadOccs
#Loads and verifies species occurrence file
##Arguments:
# occ.file(string): path to comma separated file of occurrences. Must have fields id, species, lon, lat
##Returns:
# data frame of occurrences.
LoadOccs<-function(occ.file){
if(is.data.frame(occ.file)){
occs <- occ.file
} else {
occs <- read.csv(occ.file,as.is=T)
}
with(occs, if(nrow(occs)==0){
stop("Occurrence file has 0 rows")
})
with(occs, if(!exists("id")){
stop("Variable id is missing from occurrence file")
})
with(occs, if(!exists("species")){
stop("Variable species is missing from occurrence file")
})
with(occs, if(!exists("lon")){
stop("Variable lon is missing from occurrence file")
})
with(occs, if(!exists("lat")){
stop("Variable lat is missing from occurrence file")
})
lon.errors <- with(occs, which((lon > 180)|(lon < -180)))
lat.errors <- with(occs, which((lat > 90)|(lat < -90)))
lon.na <- with(occs, which(is.na(lon)))
lat.na <- with(occs, which(is.na(lon)))
rem.idx <- unique(c(lon.errors,lat.errors,lon.na,lat.na))
if(length(rem.idx) > 0){
occs<-occs[-rem.idx, ]
message(paste0("Removed ",length(lon.errors)," longitude values >180 or < -180\n",
"Removed ",length(lat.errors)," latitude values >90 or < -90\n",
"Removed ",length(lon.na)," NA longitude values\n",
"Removed ",length(lat.na)," NA latitude values"))
}
if(nrow(occs)==0){
stop("Occurrence file has 0 rows")
}
return(occs)
}
#CleanOccs
#Extracts environmental values associated with occurrences after removing duplicates
#and eliminating records at a particular distance
##Arguments:
## occs(data frame): data frame object of occurrences
## env.vars(raster): raster or stack of environmental variables from which to extract
## environmental values.
## dist(numeric): distance below which two coordinates are considered a duplicate.
CleanOccs<-function(occs,env.vars,dist){
occs <- ddply(occs,.(species),IdNeighbors,dist=1000) #Apply the IdNeighbors function to each species
occs.covs <- extract(env.vars, cbind(occs$lon,occs$lat))
return(list(occs=occs, occs.covs=occs.covs))
}
#IdNeighbors
#Identifies records below a specified threshold distance and deletes them from occurrence
#file.
#Arguments:
## occs(data frame): data frame object of occurrences
## dist(numeric): distance below which two coordinates are considered a duplicate.
## longlat(logical): Are coordinates geographic?
#Returns:
## data frame object of occurrences without duplicate coordinates.
IdNeighbors<-function(occs,dist,longlat=TRUE){
if(nrow(occs)<2){
return(occs)
}
coords <- cbind(occs$lon,occs$lat)
dst <- pointDistance(coords,longlat=longlat)
diag(dst) <- NA
rmv.idx <- which(dst < dist,arr.ind=T)
if(nrow(rmv.idx)==0){
return(occs)
} else {
occs <- occs[-rmv.idx[, 1], ]
return(occs)
}
}
#FilterSpeciesByRecords
#Create list of species with more than min.recs records
#Arguments:
## occs(data frame): data frame object of occurrences
## min.recs(numeric): minimum number of records to be included in list.
#Returns:
## character vector of species with more than min.recs records
FilterSpeciesByRecords <- function(occs, min.recs){
df <- ddply(occs,"species",summarise,N=length(species))
sp.list <- df$species[which(df$N >= min.recs)]
if(length(sp.list) == 0){
stop(paste0("None of the species in occurrence file has more than ", min.recs, " records"))
} else {
return(sp.list)
}
}
#GenerateBkg
#Generates background to use in species distribution models
#Arguments:
## n (numeric): size of background dataset
## env.vars(raster): raster or stack of environmental variables from which background
## will be extracted.
## bkg.type(string): keyword that defines how the background will be sampled from bkg.aoi.
## random (default): background will be sampled randomly from bkg.aoi
## samples: get background samples from a file.
## sample.bkg(string): data frame (should better be a csv??) with coordinates lon lat for
## each record.Background is defined by this data frame.
#Returns:
## data frame with environmental conditions associated with background.
GenerateBkg <- function(n, env.vars, bkg.type="random", sample.bkg=NULL){
if(bkg.type == "random"){
bkg.covs <- sampleRaster(env.vars, n)
}
if(bkg.type == "samples"){
sample.coords <- cbind(sample.bkg$lon, sample.bkg$lat)
bkg.covs <- extract(env.vars, sample.coords)
}
return(as.data.frame(bkg.covs))
}
#GenerateSpBkg
#Generates species-specific psudoabsences or background
#Arguments:
## occs(data frame or matrix): 2-column matrix or data frame of species' occurrences.
## n(numeric): size of background dataset
## env.vars(raster): raster or stack of environmental variables from which background
## will be extracted.
## bkg.type(string): keyword that defines how the background will be sampled from bkg.aoi.
## random (default): background will be sampled randomly from bkg.aoi
## samples: get background samples from a file.
## bkg.aoi(string): keyword that defines where background will be sampled from.
## regions: background will be species specific, and it will correspond
## to the polygons of a shapefile that completely contain the
## species records.
## ch: background will be species specific and it will correspond to the
## convex hull of the species' records.
## regions(SpatialPolygons): SpatialPolygons object with the regions that will be used to
## define species background.
## field(string): field (column name) that defines the regions.
## Used only when bkg.aoi="regions"
## sample.bkg(data frame): data frame (should better be a csv??) with coordinates lon lat for
## each record.Background is defined by this data frame.
## buffer(numeric): Buffer in meters to be applied to convex polygons.
## Used only when bkg.aoi="ch".
#Returns:
## data frame with environmental conditions associated with background.
GenerateSpBkg <- function(occs, n, env.vars, bkg.type="random", bkg.aoi,
regions, field, sample.bkg=NULL, buffer=NULL){
bkg <- CreateAOI(occs, method=bkg.aoi, env.vars, regions, field, buffer)
tmp.stack <- stack(bkg, env.vars)
if(bkg.type == "random"){
bkg.covs <- sampleRaster(tmp.stack, n)[, 2:(nlayers(env.vars)+1)]
}
if(bkg.type == "samples") {
if(is.null(sample.bkg)){
stop("Missing target background samples file")
}
with(sample.bkg, if(!exists("lon")){
stop("Variable lon is missing from occurrence file")
})
with(sample.bkg, if(!exists("lat")){
stop("Variable lat is missing from occurrence file")
})
bkg.covs <- na.omit(extract(tmp.stack, cbind(sample.bkg$lon,sample.bkg$lat)))[, 2:(nlayers(env.vars)+1)]
}
return(list(bkg.aoi=bkg,bkg.covs=as.data.frame(bkg.covs)))
}
#CreateAOI
#Create raster of area of interest for modeling
#Arguments:
## occs(matrix or data frame): 2-column matrix or data frame of species' occurrences.
## method(string): either regions or ch depending on whether the area of interest is
## defined by the polygons of a shapefile that contain species occurrences or by a
## convex hull from occurrences
## aoi(raster): a raster object with the extent, resolution and projection of the area of interest.
## regions(SpatialPolygons): SpatialPolygons object with the regions that will be used to
## define species background.
## field(string): field (column name) that defines the regions.
## buffer(numeric): buffer in meters to be applied to convex polygons.
#Returns:
# A raster object with area of interest for modeling.
CreateAOI<-function(occs, method, aoi, regions, field, buffer){
in.pts <- SpatialPoints(cbind(occs$lon, occs$lat), proj4string = CRS(projection(aoi)))
if(method == "regions"){
if(missing(regions)){
stop("Missing regions argument")
}
if(missing(field)){
stop("Missing field argument")
}
proj4string(regions) <- CRS(projection(aoi))
units <- na.omit(unique(over(in.pts, regions)[, field]))
ind <- which(regions@data[,field] %in% units)
bkg.shp <- regions[ind,]
bkg <- rasterize(bkg.shp, aoi, field=1)
return(bkg)
}
if(method=="ch"){
ch.shp <- convHull(in.pts)@polygons
if(!is.null(buffer)){
if(buffer>0){
ch.shp <- gBuffer(ch.shp, width=buffer)
}
}
bkg <- rasterize(ch.shp, aoi, field=1)
bkg <- bkg * (!is.na(aoi[[1]]))
return(bkg)
}
}
#sampleRaster
#Function to sample n points randomly from a raster object
#Arguments:
## raster.obj(raster): raster object to sample coordinates pairs from
## n(numeric): number of coordinate pairs to sample
#Returns:
## data frame of sampled coordinates
sampleRaster<-function(raster.obj,n){
if(nlayers(raster.obj)>1){
mask <- sum(raster.obj)
cells <- Which(!is.na(mask),cells=T)
} else {
cells <- Which(!is.na(raster.obj),cells=T)
}
if(length(cells)<n){
n <- length(cells)
warning("n value exceeds the number of cells with data")
}
sel.cells <- sample(cells, n)
output <- raster.obj[sel.cells]
return(output)
}