diff --git a/.Rbuildignore b/.Rbuildignore index 42af15a..cfb32f7 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,4 +3,5 @@ changelog.txt .travis.yml ^.*\.Rproj$ ^\.Rproj\.user$ -README.md \ No newline at end of file +README.md + diff --git a/.gitignore b/.gitignore index 89818b0..3575951 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .Rproj.user .Rhistory .RData -*.Rproj \ No newline at end of file +*.Rproj + diff --git a/DESCRIPTION b/DESCRIPTION index 012041a..d174c6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,20 @@ Package: rLakeAnalyzer Title: Lake Physics Tools Maintainer: Luke Winslow -Version: 1.9.1 +Version: 1.10.0 Author: Luke Winslow, Jordan Read, Richard Woolway, Jennifer Brentrup, Taylor - Leach, Jake Zwart + Leach, Jake Zwart, Sam Albers, Doug Collinge Description: Standardized methods for calculating common important derived physical features of lakes including water density based based on temperature, thermal layers, thermocline depth, lake number, Wedderburn number, Schmidt stability and others. +Depends: R (>= 2.10) Imports: plyr Suggests: - testthat + testthat, + knitr, + rmarkdown License: GPL (>= 2) Packaged: 2014-07-06 09:09:24 UTC; Luke Repository: https://github.com/GLEON/rLakeAnalyzer diff --git a/NAMESPACE b/NAMESPACE index 0c11852..556dacd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,8 @@ export(wedderburn.number, layer.density, lake.number, uStar, water.density, schmidt.plot, center.buoyancy, ts.center.buoyancy, internal.energy, ts.internal.energy, epi.temperature, hypo.temperature, whole.lake.temperature, approx.bathy) +export(depth.filter) +export(wtr.layer) import("plyr") importFrom("grDevices", "colorRampPalette") @@ -14,3 +16,6 @@ importFrom("graphics", "abline", "axis", "filled.contour", "legend", "lines", "plot", "segments", "title") importFrom("stats", "approx") importFrom("utils", "read.table") + + + diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..23bd833 --- /dev/null +++ b/R/data.R @@ -0,0 +1,15 @@ +#' Late Summer Profile +#' +#' Late summer water profile taken from Quesnel Lake, British Columbia, Canada. Profile taken with Sea-Bird +#' SBE19plus. +#' +#' \describe{ +#' \item{depth}{Depth, m} +#' \item{temper}{Temperature, degC} +#' \item{salinity}{Salinity, PSU} +#' \item{oxygen}{Oxygen, ml/l} +#' \item{oxygen.sat}{Oxygen saturation, percent saturation} +#' \item{density}{Density, kg/m^3} +#' ... +#' } +"latesummer" \ No newline at end of file diff --git a/R/sm-algorithm.R b/R/sm-algorithm.R new file mode 100644 index 0000000..657b718 --- /dev/null +++ b/R/sm-algorithm.R @@ -0,0 +1,457 @@ +#' @title spliting interval i into 2 pieces +#' @description spliting interval i into 2 pieces +#' +#' @param i [INTEGER] interval number, should be less than NR+1 +#' @param ni [INTEGER(NR)] current array with interval start point number +#' +#' @noRd +#' @return new array with interval start point number +#' +#' +#' @examples +#' ni = c(1,5,10,15,20) +#' nr = 4 +#' ni +#' ni = split_interval(ni,i=2) +#' ni +#' ni = split_interval(ni,i=5) +#' ni +#' ni = split_interval(ni,i=2) +#' ni +#' ni = split_interval(ni,i=1) +#' ni +#' ni = split_interval(ni,i=length(ni)-1) +#' ni +#' ni = split_interval(ni,i=length(ni)-1) +#' ni +#' ni + +split_interval <- function(ni, i) { + stopifnot(i < length(ni)) # Otherwise ni[i+1] would fail. + k1 <- ni[i] + k2 <- ni[i + 1] + jsplit <- floor((k1 + k2) / 2) # Is an index, must be an integer. + + # The following lines are taken from the original code but actually + # are in error. They are included simply to make the tests produce the + # identical answers in every case. + if (jsplit >= k2 - 1) jsplit <- k2 - 2 + if (jsplit <= k1 + 1) jsplit <- k1 + 2 + + stopifnot(k1 < jsplit & jsplit < k2) # Verify that the interval has actually been split. + # Create a new vector with jsplit at position i + c(ni[1:i], jsplit, ni[(i + 1):length(ni)]) +} + + +#' +#' @title merge_intervals +#' @description merge interval i and i+1 +#' @param i interval number of the first segment to merge +#' @param ni current array with interval start point numbers +#' +#' @noRd +#' @details Intervals to be merged + + +merge_intervals <- function(i, ni) { + stopifnot(i > 1) # Can't merge the first interval + stopifnot(i < length(ni)) # Must have an element at n[i+1] + stopifnot(length(ni) > 2) # Only one interval so can't merge + c(ni[1:i - 1], ni[(i + 1):length(ni)]) # Just removes element i +} + + +#' +#' @description Normalize a vector of samples. X and Y will be normalized so that the first point is (0,0) and the last point is (1,1). Vector x are the input x values, must be in ascending order. +#' +#' @title Normalize a vector of samples. +#' @param x input x-axis array (should be in "chronological" order) +#' @param y input y-axis array +#' @return Output is a list with: +#' \itemize{ +#' \item anormx, the normalization value used to make the last x == 1. +#' \item anormy, the normalization value used to make the last y == 1. +#' \item xx,yy vectors of x,y values interpolated to be the same length as x0. +#' } +#' +#' @noRd +#' @details A function to normalize a vector of samples +#' + +getxxnorm <- function(x, y) { + anormx <- x[length(x)] - x[1] + anormy <- y[length(y)] - y[1] + xx <- (x - x[1]) / anormx + yy <- (y - y[1]) / anormy + + list(anormx = anormx, anormy = anormy, xx = xx, yy = yy) +} + + + +#' @title computing a norm value for the segment from point k1 to k2-1 +#' @description Computes the norm value for the segment from the point k1 to k2-1 +#' @param k1 [INTEGER] start point +#' @param k2 [INTEGER] end point+1 +#' @param x [REAL] input x-axis array (predictor) +#' @param y [REAL] input y-axis array (response) +#' @noRd +#' @return norm value + +#' @examples +#' ni <- c( 1, 201, 402 ) +#' i <- 1 +#' k1 <- ni[i] +#' k2 <- ni[i+1] +#' +#' +#' r2b(k1, k2, y=latesummer$temper, x=latesummer$depth) + + +r2b <- function(k1, k2, x, y) { + if (k2 - k1 <= 2) { + r2b <- 0 + # a = 0 # original code does not produce a or b? + # b = 0 + } else { + is <- k1:(k2 - 1) + sxx <- sum(x[is] ^ 2) + sxy <- sum(x[is] * y[is]) + sy <- sum(y[is]) + sx <- sum(x[is]) + + n <- k2 - k1 + + a <- 0.0 + if (k1 > 1) { + a <- (n * sxy - sy * sx) / (n * sxx - sx * sx) + } + b <- (sy - a * sx) / n + r2b <- max(abs(y[is] - a * x[is] - b) / sqrt(a ^ 2 + 1)) + } + + return(r2b) +} + + + + +#' @title Computing linear segments for a specified error norm value. +#' @description Segments the data in x,y into the intervals given in the output array A. +#' The data in each interval can be linearly fitted within an error, given by r2b(), less than eps. +#' The dynamic arrays in R remove the need to know the number of data points, n, which is just +#' the length of x and y. Similarly Nr is no longer needed and is length(Ni)-1, one less than the length +#' of the output array. +#' +#' @param eps [real] error norm +#' @param x [real] input x-axis array, should be an increasing function of index +#' @param y [real] input y-axis array +#' @noRd +#' @return [integer] array A of indices giving data segments. +#' A[i] start of interval; A[i+1] end of interval, for any i eps) { + # N.B. We split an interval here so ni gets one element longer + # N.B. If an interval is added we will have to test the first + # of the two new intervals so we don't increment i. + ni <- split_interval(ni, i) + changed <- TRUE + } + else { + # Prior intervals all meet eps test, go to next one. + i <- i + 1 + } + } + + changed <- TRUE # Keep track of whether any intervals were altered... + while (changed) { # ... because we are not finished until none were. + changed <- FALSE # Continue the loop only if something changed. + + # All intervals now meet the test but it is possible that there are too many + # in the sense that there may now be adjacent intervals, which if merged into one, + # will then still meet the test. + # Scan for such interval pairs and merge them. + + # Loop i over all possible "middle" intervals, which we may remove. + # I am not sure at this point why the last interval is not considered for merging, + # as it would be if i<=length(ni)-1 , which seems more natural to me. + + # Note that ni potentially changes inside the loop. + i <- 2 + while (i <= length(ni) - 2) { + # if( 2 <= length(ni)-2 ) { + # # This branch had to be added because the Fortran DO loop will not execute + # # if the start value is greater than the limit value but R runs backward. + # for( i in 2:(length(ni)-2) ) { + k1 <- ni[i - 1] + k2 <- ni[i + 1] + eps1 <- r2b(k1 = k1, k2 = k2, x = x, y = y) + if (eps1 < eps) { + if (length(ni) - 1 > 2) { # Are there two or more intervals in the entire set? + # Yes, so merge the interval we are looking at. + ni <- merge_intervals(i, ni) + changed <- TRUE + # break # exit the loop + } else { + # We are here because the last two intervals tested out to merge. + # So we can just adjust the intervals and bail out entirely because, + # apparently the entire data set lies on a line within eps! + # TODO: The original code doesn't seem to do this correctly. It should + # adjust ni[2] = ni[3] and nr = 1. I think this branch has never been + # taken in actual operation. Obviously the data set is invalid if it lies + # entirely on a straight line, right? + + # ni[1]=1 # TODO: Original code, but this should already be the case. + return(ni) + } + } + # } + i <- i + 1 + } + + # If a merge was done then changed==TRUE. In this circumstance, the original + # code then immediately attempts to merge all intervals again from 2 so we + # do the same. This procedure seems wasteful because all the prior + # interval pairs have already been tested for merging and don't need it, yet we + # are going to test them all over again. For the moment we will continue to + # do it this way and perhaps TODO change it later so that it just restarts + # with the just-merged interval. Should get the same results faster. + + if (changed) next # short out the rest of the while(changed) loop. + + # In the original code the following is preceded with a comment: + # "to avoid couples" + # I'm not precisely sure what that is supposed to mean but from the original + # code I infer: + # + # If the end index of any interval is only 1 greater than the start index + # (i.e., ni[i+1] == 1 + ni[i]) then bump the end index up by one. Due to the + # loop this change propagates up to the last interval so we can't easily + # replace the loop with nice vectorized R code. + + for (i in 1:(length(ni) - 1)) { + if (ni[i + 1] - ni[i] == 1) { + ni[i + 1] <- ni[i + 1] + 1 + changed <- TRUE + } + } + + # TODO: seems like we should restart the while(changed) loop at this point. + + # At this point in the original code stands the comment: + # "{"R" algorithm: adjusting the endpoint}" + # I don't know what the "R algorithm" is so I'm just going to have to wing + # comments. + + # Scan all adjacent interval pairs. + for (i in 2:(length(ni) - 1)) { + # First of pair (ni[i-1],ni[i]), second (ni[i],ni[i+1]) + k1 <- ni[i - 1] + kmid <- ni[i] + k2 <- ni[i + 1] + # Find the error on both intervals and take the max. + epsm <- max(r2b(k1, kmid, x, y), r2b(kmid, k2, x, y)) # TODO: I don't think this is necessary. We'll find it below anyway. + + # We are going to move the midpoint and find the split that gives the + # best error. + j1 <- ni[i] # Keep track of best splitpoint so far. + # Scan all alternative splitpoints between these two enpoints. + for (j in (k1 + 2):(k2 - 2)) { + epsr <- max(r2b(k1, j, x, y), r2b(j, k2, x, y)) # Calculate max error + if (epsr < epsm) { + epsm <- epsr # This split is the best so far + j1 <- j # Keep track of which index it is at. + } + } + + if (j1 != ni[i]) { # Did we find a better splitpoint? + ni[i] <- j1 # Yes, change the split. + changed <- TRUE + } + + # Here in the original code stands "if (i.eq.2) epsm1=epsm" + # but epsm1 is never used so we have ignored it. + } # for + } # while(changed) + + # The following statement corresponds to one in the original code but seems + # unnecessary. The code that manipulates ni should really maintain this condition. + # Mind you, I haven't actually checked carefully that this is true. Maybe the + # author added it to solve a problem! TODO: maybe take it out and test sometime. + ni[length(ni)] <- length(x) + + return(ni) # Q.E.D. +} + + +#' @title Computing linear segments for a specified error norm value. +#' @description Subroutine to determine the Linear SEGMENTS for a PREDEFINED NUMBER OF SEGMENTS (NR) +#' (Use this program when you want to predefine the number of segments you want to fit +#' to the data) Segments the data in x,y into the intervals given in the output array A. +#' The data in each interval can be linearly fitted within an error, given by r2b(), less than eps. +#' @param nr [integer] number of segments, fixed. +#' @param x [real] input x-axis array, should be an increasing function of index +#' @param y [real] input y-axis array +#' @noRd +#' @return [list(eps=eps,ni=ni)] where: +#' \itemize{ +#' \item eps [real] is the maximum error over all intervals, +#' \item ni is a [vector of integer, length nr+1] vector Ni of indices giving data segments +#' \item Ni[i] start of interval; Ni[i+1] end of interval, for any i 2 ) { # Are there two or more intervals in the entire set? + # # Yes, so merge the interval we are looking at. + # ni <- merge_intervals(i, ni) + # changed = TRUE + # break # exit the for loop + # } else { + # # We are here because the last two intervals tested out to merge. + # # So we can just adjust the intervals and bail out entirely because, + # # apparently the entire data set lies on a line within eps! + # # TODO: The original code doesn't seem to do this correctly. It should + # # adjust ni[2] = ni[3] and nr = 1. I think this branch has never been + # # taken in actual operation. Obviously the data set is invalid if it lies + # # entirely on a straight line, right? + # + # ni[1]=1 # TODO: Original code, but this should already be the case. + # return(ni) + # } + # } + # } + # } + + # # If a merge was done then changed==TRUE. In this circumstance, the original + # # code then immediately attempts to merge all intervals again from 2 so we + # # do the same. This procedure seems wasteful because all the prior + # # interval pairs have already been tested for merging and don't need it, yet we + # # are going to test them all over again. For the moment we will continue to + # # do it this way and perhaps TODO change it later so that it just restarts + # # with the just-merged interval. Should get the same results faster. + # + # if( changed ) next # short out the rest of the while(changed) loop. + + # In the original code the following is preceded with a comment: + # "to avoid couples" + # I'm not precisely sure what that is supposed to mean but from the original + # code I infer: + # + # If the end index of any interval is only 1 greater than the start index + # (i.e., ni[i+1] == ni[i]) then bump the end index up by one. Due to the + # loop this change propagates up to the last interval so we can't easily + # replace the loop with nice vectorized R code. + + for (i in 1:(length(ni) - 1)) { + if (ni[i + 1] - ni[i] == 1) { + ni[i + 1] <- ni[i + 1] + 1 + changed <- TRUE + } + } + + # TODO: seems like we should restart the while(changed) loop at this point. + + # At this point in the original code stands the comment: + # "{"R" algorithm: adjusting of endpoint}" + # I don't know what the "R algorithm" is so I'm just going to have to wing + # comments. + + for (i in 2:(length(ni) - 1)) { + k1 <- ni[i - 1] + kmid <- ni[i] + k2 <- ni[i + 1] + epsm <- max(r2b(k1, kmid, x, y), r2b(kmid, k2, x, y)) + + j1 <- ni[i] + for (j in (k1 + 2):(k2 - 2)) { + epsr <- max(r2b(k1, j, x, y), r2b(j, k2, x, y)) + if (epsr < epsm) { + epsm <- epsr + j1 <- j + } + } + + if (j1 != ni[i]) { + ni[i] <- j1 + changed <- TRUE + } + + # Here in the original code stands "if (i.eq.2) epsm1=epsm" + # but epsm1 is never used so we have ignored it. + } # for + } # while(changed) + + # The following statement corresponds to one in the original code but seems + # unnecessary. The code that manipulates ni should really maintain this condition. + # Mind you, I haven't actually checked carefully that this is true. Maybe the + # author added it to solve a problem! TODO: maybe take it out and test sometime. + ni[length(ni)] <- length(x) + + return(list(eps = epsm, ni = ni)) # Q.E.D. +} diff --git a/R/sm-utils.R b/R/sm-utils.R new file mode 100644 index 0000000..bf4e9b2 --- /dev/null +++ b/R/sm-utils.R @@ -0,0 +1,166 @@ +#' Service subroutine for determining Mixed Layer Depth for a SPECIFIED ERROR NORM VALUE +#' @description Service subroutine for determining Mixed Layer Depth for a SPECIFIED ERROR NORM VALUE +#' +#' @param thres error norm; +#' @param z0 initial depth: use to omit data above z0 +#' @param zmax maximum depth: use to omit data below zmax +#' @param z input x data array, should be increasing function of index +#' @param sigma input y data array +#' +#' @noRd +#' +#' @return list(nimax=nimax,by_s_m=ss, cline=cline,smz=smz,sms=sms) +#' \itemize{ +#' \item nimax: number of segments +#' \item smz: final z array of segmented data +#' \item sms: final sigma array of segmented data +#' \item by_s_m: position of MLD = smz(2); or -99 if something is not right +#' } +#' + +#' +by_s_m <- function(thres=thres, z0=z0, zmax=zmax, z=z, sigma=sigma) { + # by_s_m=-99.0 # TODONE: this is for an error return, crash instead. + nn <- 800 # TODO: why? + # nn=length(z) + + # finding initial s-level + i1 <- 1 + sum(z < z0) # Find index of first z[] less than z0 + # if(i1==length(z)) return() # TODONE: probably should crash here + if (i1 == length(z)) { + stop("Initial depth, z0, excludes all depths in data, z!") + # return() # TODONE: probably should crash here + } + + sigma0 <- sigma[i1] + + # finding second s-level + i2 <- sum(z <= zmax) # Find index prior to first z[] greater than zmax + + dz <- (z[i2] - z[i1]) / nn + + results <- getxxnorm(z[i1:i2], sigma[i1:i2]) + + ni <- s_m_p(thres, results$xx, results$yy) + k <- ni[2] + ax <- results$anormx + ay <- results$anormy + ss <- 0.5 * (results$xx[k] + results$xx[k - 1]) * ax + z[i1] + + nimax <- min(100, length(ni) - 1) # TODO: why 100? + smz <- rep(0, nimax) # Reserve space + sms <- rep(0, nimax) # Reserve space + smz[1] <- z[i1] + sms[1] <- sigma[i1] + i <- 2:(nimax + 1) + k <- ni[i] + smz[i] <- 0.5 * (results$xx[k] + results$xx[k - 1]) * ax + z[i1] + sms[i] <- 0.5 * (results$yy[k] + results$yy[k - 1]) * ay + sigma[i1] + + list(nimax = nimax, by_s_m = ss, smz = smz, sms = sms) +} + + +#' Service subroutine for determining Mixed Layer Depth for a SPECIFIED NUMBER OF SEGMENTS +#' @description Service subroutine for determining Mixed Layer Depth for a SPECIFIED ERROR NORM VALUE +#' +#' @param nr fixed number of segments +#' @param z0 initial depth: use to omit data above z0 +#' @param zmax maximum depth: use to omit data below zmax +#' @param z input x data array, should be increasing function of index +#' @param sigma input y data array +#' +#' @noRd +#' +#' @return list(eps=s_mNresults$eps, cline=cline, by_s_m=ss,smz=smz,sms=sms) +#' \itemize{ +#' \item eps: the maximum error over all intervals. +#' \item smz: final z array of segmented data +#' \item sms: final sigma array of segmented data +#' \item by_s_m: position of MLD = smz(2); or -99 if something is not right +#' \item cline: Cline depth is defined as the midpoint of the segment connecting inflection points that has the maximum slope +#' } +#' + + + +by_s_m3 <- function(nr, z0, zmax, z, sigma) { + # by_s_m=-99.0 # TODONE: why? + nn <- 800 # TODO: why? + # nn=length(z) + + # finding initial s-level + i1 <- 1 + sum(z < z0) # Find index of first z[] less than z0 + if (i1 == length(z)) { + stop("Initial depth, z0, excludes all depths in data, z!") + # return() # TODO: probably should crash here + } + sigma0 <- sigma[i1] + + # finding second s-level + # TODO: look into why by_s_m.R has a different loop. One must be wrong. + i2 <- sum(z < zmax) # Find index prior to first z[] greater than zmax + + dz <- (z[i2] - z[i1]) / nn + + results <- getxxnorm(z[i1:i2], sigma[i1:i2]) + + s_mNresults <- s_mN(nr, results$xx, results$yy) + ni <- s_mNresults$ni + + k <- ni[2] + ax <- results$anormx + ay <- results$anormy + ss <- 0.5 * (results$xx[k] + results$xx[k - 1]) * ax + z[i1] + + # nimax = min( 100, length(ni)-1 ) #TODO: why 100? + smz <- rep(0, nr) # Reserve space + sms <- rep(0, nr) # Reserve space + smz[1] <- z[i1] + sms[1] <- sigma[i1] + i <- 2:(nr + 1) + k <- ni[i] + smz[i] <- 0.5 * (results$xx[k] + results$xx[k - 1]) * ax + z[i1] + sms[i] <- 0.5 * (results$yy[k] + results$yy[k - 1]) * ay + sigma[i1] + + + list(eps = s_mNresults$eps, by_s_m = ss, smz = smz, sms = sms) +} + +#' @title Calculate cline of series of segments +#' @param z_seg depth in metres; should be an increasing vector +#' @param sigma_seg parameter measured in the water column profile +#' @return the depth of the cline +#' @description Cline depth is defined as the midpoint of the segment connecting inflection points that has the maximum slope. A cline cannot occur over a depth range of less than 1m and also cannot be the shallowest segment. Violating both conditions will thrown warnings though this function handles both differently. Used mostly with \code{wtr_layer} +#' @references Fiedler, Paul C. Comparison of objective descriptions of the thermocline. Limnology and Oceanography: Methods 8.6 (2010): 313-325. +#' @keywords internal +#' +#' @seealso +#' \code{wtr_layer()} +#' +#' +cline_calc <- function(z_seg = z_seg, sigma_seg = sigma_seg) { + ## index top of segment with greatest change of measure (sigma) over depth (z) - metalimnion + top <- which.max(diff(z_seg) / diff(sigma_seg)) + + ## The cline can not be the top segment. If this is the case, kick it down one interval + if (top == 1) { + warning("Algorithm calculates cline to be in top segment. This is likely due to surface scatter. Using the next interval.") + top <- top + 1 + } else { + top <- top + } + + + ## index bottom of segment with greatest change of measure (sigma) over depth (z) - metalimnion + bot <- top + 1 + + ## if depth difference between two segments is lt 1 throw an error + ## TODO: Also kick interval down one and + if (diff(z_seg[c(top, bot)]) <= 1) { + warning("depth range of segment with greatst change over depth is less than one. Likely not a cline.") + cline <- NA + } else { + mean(z_seg[c(top, bot)]) + } +} diff --git a/R/wtr-layer-segments.R b/R/wtr-layer-segments.R new file mode 100644 index 0000000..7e69a87 --- /dev/null +++ b/R/wtr-layer-segments.R @@ -0,0 +1,145 @@ +#' @export +#' @title Exploration of lake water column layers +#' @description Extract water column parameters of a given parameter from a profile using the split-and-merge algorithm. +#' The cline is defined as the midpoint of the layer of water where the physical property change in the greatest over a +#' small difference. The exact cline depends on the specification of measure. For example if temperature is specified, +#' then we can expect cline to output the thermocline. +#' @param data data supplied as a bare (unquoted) value +#' @param depth depth in metres; should be an increasing vector; supplied as a bare (unquoted) value +#' @param measure parameter measured in the water column profile; supplied as a bare (unquoted) value +#' @param thres error norm; defaults to 0.1 +#' @param z0 initial depth in metres. Defaults to 2.5m +#' @param zmax maximum depth in metres: defaults to 150m +#' @param nseg optional parameter to define the number of segments a priori; defaults to an unconstrained approach whereby +#' the algorithm determines segmentations by minimzing the error norm over each segment +#' @return a dataframes with a list column. This includes: nseg (number of segments), mld (mix layer depth), cline (the +#' midpoint of the segment connecting inflection points that has the maximum slope; thermocline for temperature measures) +#' and segments calculated by the sm algorithm. + +#' @references Thomson, R. and I. Fine. 2003. Estimating Mixed Layer Depth from Oceanic Profile Data. Journal of +#' Atmospheric and Oceanic Technology. 20(2), 319-329. +#' @examples +#' data("latesummer") +#' df1 <- wtr.layer(depth=latesummer$depth, measure = latesummer$temper) +#' df1$mld +#' df1$segments +#' +#' wtr.layer(data = latesummer, depth=depth, measure = temper, nseg=4) +#' + + +wtr.layer <- + function(data, + depth, + measure, + thres = 0.1, + z0 = 2.5, + zmax = 150, + nseg = "unconstrained") { + + + ## Note accounting for difference between interval (nimax=neg-1) and segments (nseg=nimax+1) + ## NSE to account for data argument + if (missing(data)) { + if (length(depth) <= 30) { + warning("Profile does not have enough readings for sm algorithm (<30): returning NA") + return(data.frame(min_depth = NA, nseg = NA, mld = NA, cline = NA)) + } + + if (nseg == "unconstrained") { + sam_list <- by_s_m(thres = thres, z0 = z0, zmax = zmax, z = depth, sigma = measure) + nseg <- sam_list[["nimax"]] + 1 + } else { + sam_list <- by_s_m3(nr = nseg - 1, z0 = z0, zmax = zmax, z = depth, sigma = measure) + nseg <- nseg + } + } else { + if (length(data[["depth"]]) <= 30) { + warning("Profile does not have enough readings for sm algorithm (<30): returning NA") + return(data.frame(min_depth = NA, nseg = NA, mld = NA, cline = NA)) + } + + if (nseg == "unconstrained") { + sam_list <- eval(substitute(by_s_m(thres = thres, z0 = z0, zmax = zmax, z = depth, sigma = measure)), data) + nseg <- sam_list[["nimax"]] + 1 + } else { + sam_list <- eval(substitute(by_s_m3(nr = nseg - 1, z0 = z0, zmax = zmax, z = depth, sigma = measure)), data) + nseg <- nseg + } + } + layers <- data.frame( + min_depth = z0, + nseg = nseg, + mld = sam_list[["by_s_m"]], + cline = cline_calc(z_seg = sam_list[["smz"]], sigma_seg = sam_list[["sms"]]) + ) + layers[["segments"]] <- list(data.frame( + segment_depth = sam_list[["smz"]], + segment_measure = sam_list[["sms"]] + )) + return(layers) + } + + +#' @export +#' @title Data filter to remove soak, heave and upcast +#' @param z0 depth vector +#' @param run_length Length of run upon which to start the soak removal +#' @param index Logical: Should the function return an index value or actual value? +#' @return index values of z0 of filtered data. Will return a warning if the function removed more than 10% of the data +#' @description +#' \itemize{ +#' \item Soak period: water profiling instruments typically require a soak period where you let the instrument rest +#' submerged at the surface. While it is "soaking" it is collecting data. We don't want that data +#' \item Upcast versus downcast: typically instruments are turned on before you put them in the water and turn them off +#' once you pull them out. The data consequence of that is that you collect both the "downcast" and the "upcast". In some +#' case the upcast is of interest but usually it isn't. And because we would prefer increasing depth data it is better to +#' remove an upcast if it is present. +#' \item Heave: when lowering the instrument in rough weather a boat will heave side to side. Sometimes it will heave +#' enough that you get small data groupings where the decreases a little while the boat heaves then go down. The overall +#' trend is still down but those slight upticks in depth cause problems for our algorithm. +#' } + + +depth.filter <- function(z0, run_length=20, index = FALSE) { + n_start <- length(z0) + + ## REMOVES SOAK PERIOD + ## s are where the runs start; tack on length(z0)+1, where the next run would start if the vector continued + ## subsequent runs start where there is a + s <- 1L + c(0L, which(z0[-1L] < z0[-length(z0)]), length(z0)) + ## Index of first run of numbers greater than run_length (defaults to 20) + + # if( length(s) > run_length ) { + w <- min(which(diff(s) >= run_length)) + + ## Index from first run GTE 20 + start <- s[w] + ## TODO: fix the max depth identification + end <- which.max(z0) + + ## Index numbers of x of non soak + idx_soak <- start:end + + ## Depth values using soak index + ## I'm sure there is a better way to do this. + x1 <- z0[idx_soak] + + ## Index of heave + idx_heave <- unique(Reduce(function(p, i) if (x1[i] > x1[p]) i else p, seq_along(x1), accumulate = TRUE)) + + ## Percent data loss + p_loss <- 100 - (length(z0[idx_heave]) / n_start) * 100 + + if (p_loss > 10) { + message(paste0("Soak, heave and bottom data filter removed ", round(p_loss, 2), "% of the data")) + } + + idx <- idx_soak[idx_heave] + + if (index == TRUE) { + return(idx) + } else { + return(z0[idx]) + } +} diff --git a/data/latesummer.rda b/data/latesummer.rda new file mode 100644 index 0000000..f9ac876 Binary files /dev/null and b/data/latesummer.rda differ diff --git a/man/approx.bathy.Rd b/man/approx.bathy.Rd index 6d590a5..3ab15b1 100644 --- a/man/approx.bathy.Rd +++ b/man/approx.bathy.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approx.bathy.improved.R +% Please edit documentation in R/approx.bathy.R \name{approx.bathy} \alias{approx.bathy} \title{Estimate hypsography curve} diff --git a/man/cline_calc.Rd b/man/cline_calc.Rd new file mode 100644 index 0000000..e829b0a --- /dev/null +++ b/man/cline_calc.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sm-utils.R +\name{cline_calc} +\alias{cline_calc} +\title{Calculate cline of series of segments} +\usage{ +cline_calc(z_seg = z_seg, sigma_seg = sigma_seg) +} +\arguments{ +\item{z_seg}{depth in metres; should be an increasing vector} + +\item{sigma_seg}{parameter measured in the water column profile} +} +\value{ +the depth of the cline +} +\description{ +Cline depth is defined as the midpoint of the segment connecting inflection points that has the maximum slope. A cline cannot occur over a depth range of less than 1m and also cannot be the shallowest segment. Violating both conditions will thrown warnings though this function handles both differently. Used mostly with \code{wtr_layer} +} +\references{ +Fiedler, Paul C. Comparison of objective descriptions of the thermocline. Limnology and Oceanography: Methods 8.6 (2010): 313-325. +} +\seealso{ +\code{wtr_layer()} +} +\keyword{internal} diff --git a/man/depth.filter.Rd b/man/depth.filter.Rd new file mode 100644 index 0000000..371568f --- /dev/null +++ b/man/depth.filter.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wtr-layer-segments.R +\name{depth.filter} +\alias{depth.filter} +\title{Data filter to remove soak, heave and upcast} +\usage{ +depth.filter(z0, run_length = 20, index = FALSE) +} +\arguments{ +\item{z0}{depth vector} + +\item{run_length}{Length of run upon which to start the soak removal} + +\item{index}{Logical: Should the function return an index value or actual value?} +} +\value{ +index values of z0 of filtered data. Will return a warning if the function removed more than 10% of the data +} +\description{ +\itemize{ + \item Soak period: water profiling instruments typically require a soak period where you let the instrument rest + submerged at the surface. While it is "soaking" it is collecting data. We don't want that data + \item Upcast versus downcast: typically instruments are turned on before you put them in the water and turn them off + once you pull them out. The data consequence of that is that you collect both the "downcast" and the "upcast". In some + case the upcast is of interest but usually it isn't. And because we would prefer increasing depth data it is better to + remove an upcast if it is present. + \item Heave: when lowering the instrument in rough weather a boat will heave side to side. Sometimes it will heave + enough that you get small data groupings where the decreases a little while the boat heaves then go down. The overall + trend is still down but those slight upticks in depth cause problems for our algorithm. +} +} diff --git a/man/latesummer.Rd b/man/latesummer.Rd new file mode 100644 index 0000000..9915f87 --- /dev/null +++ b/man/latesummer.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{latesummer} +\alias{latesummer} +\title{Late Summer Profile} +\format{An object of class \code{data.frame} with 881 rows and 6 columns.} +\usage{ +latesummer +} +\description{ +Late summer water profile taken from Quesnel Lake, British Columbia, Canada. Profile taken with Sea-Bird + SBE19plus. +} +\details{ +\describe{ + \item{depth}{Depth, m} + \item{temper}{Temperature, degC} + \item{salinity}{Salinity, PSU} + \item{oxygen}{Oxygen, ml/l} + \item{oxygen.sat}{Oxygen saturation, percent saturation} + \item{density}{Density, kg/m^3} + ... +} +} +\keyword{datasets} diff --git a/man/wtr.layer.Rd b/man/wtr.layer.Rd new file mode 100644 index 0000000..5292f17 --- /dev/null +++ b/man/wtr.layer.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wtr-layer-segments.R +\name{wtr.layer} +\alias{wtr.layer} +\title{Exploration of lake water column layers} +\usage{ +wtr.layer(data, depth, measure, thres = 0.1, z0 = 2.5, zmax = 150, + nseg = "unconstrained") +} +\arguments{ +\item{data}{data supplied as a bare (unquoted) value} + +\item{depth}{depth in metres; should be an increasing vector; supplied as a bare (unquoted) value} + +\item{measure}{parameter measured in the water column profile; supplied as a bare (unquoted) value} + +\item{thres}{error norm; defaults to 0.1} + +\item{z0}{initial depth in metres. Defaults to 2.5m} + +\item{zmax}{maximum depth in metres: defaults to 150m} + +\item{nseg}{optional parameter to define the number of segments a priori; defaults to an unconstrained approach whereby +the algorithm determines segmentations by minimzing the error norm over each segment} +} +\value{ +a dataframes with a list column. This includes: nseg (number of segments), mld (mix layer depth), cline (the + midpoint of the segment connecting inflection points that has the maximum slope; thermocline for temperature measures) + and segments calculated by the sm algorithm. +} +\description{ +Extract water column parameters of a given parameter from a profile using the split-and-merge algorithm. + The cline is defined as the midpoint of the layer of water where the physical property change in the greatest over a + small difference. The exact cline depends on the specification of measure. For example if temperature is specified, + then we can expect cline to output the thermocline. +} +\examples{ +data("latesummer") +df1 <- wtr.layer(depth=latesummer$depth, measure = latesummer$temper) +df1$mld +df1$segments + +wtr.layer(data = latesummer, depth=depth, measure = temper, nseg=4) + +} +\references{ +Thomson, R. and I. Fine. 2003. Estimating Mixed Layer Depth from Oceanic Profile Data. Journal of + Atmospheric and Oceanic Technology. 20(2), 319-329. +} diff --git a/tests/testthat.R b/tests/testthat.R index 72fb52a..350fd1c 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,2 +1,3 @@ library(testthat) test_check('rLakeAnalyzer') + diff --git a/tests/testthat/test_cline_calc.R b/tests/testthat/test_cline_calc.R new file mode 100644 index 0000000..c806408 --- /dev/null +++ b/tests/testthat/test_cline_calc.R @@ -0,0 +1,7 @@ +context("Testing for warnings in cline_calc") + +test_that("cline_calc throws a warning with this data", { + depth <- c(1.56, 1.61, 20.5, 29.8) + measure <- c(13.0, 12.8, 7.8, 7.4) + expect_warning(cline_calc(depth, measure)) +}) diff --git a/tests/testthat/test_depth.filter.R b/tests/testthat/test_depth.filter.R new file mode 100644 index 0000000..66d9e2a --- /dev/null +++ b/tests/testthat/test_depth.filter.R @@ -0,0 +1,16 @@ +context("Testing depth_filter") + +test_that("Removed heave, upcast and soak period", { + # plot(1:length(x), x) + x <- c( + 1.095, 1.094, 1.2, 1.096, 1.098, 1.097, 1.101, 1.12, 1.12, 1.12, 1.151, 1.201, 1.245, 1.293, 1.379, + 1.482, 1.555, 1.616, 1.669, 1.719, 1.78, 1.842, 1.91, 1.949, 1.959, + 1.955, 1.939, 1.911, 1.899, 1.903, 1.908, 1.922, 1.918, 1.907, 1.893, + 1.88, 1.877, 1.884, 1.895, 1.903, 1.914, 1.917, 1.913, 1.905, 1.9 + ) + # plot(1:length(x[depth_filter(x)]),x[depth_filter(x)]) + expect_equal(depth.filter(x), c( + 1.097, 1.101, 1.12, 1.151, 1.201, 1.245, 1.293, 1.379, 1.482, + 1.555, 1.616, 1.669, 1.719, 1.78, 1.842, 1.91, 1.949, 1.959 + )) +}) diff --git a/tests/testthat/test_getxxnorm.R b/tests/testthat/test_getxxnorm.R new file mode 100644 index 0000000..a5eca53 --- /dev/null +++ b/tests/testthat/test_getxxnorm.R @@ -0,0 +1,9 @@ +context("Testing the getxxnorm() function.") + +test_that("merge_intervals merges the first interval.", { + expect_equal(merge_intervals(2, c(1, 5, 10, 20)), c(1, 10, 20)) +}) + +test_that("merge_intervals merges the second-to-last interval.", { + expect_equal(merge_intervals(3, c(1, 5, 10, 20)), c(1, 5, 20)) +}) diff --git a/tests/testthat/test_merge.R b/tests/testthat/test_merge.R new file mode 100644 index 0000000..f7b6faa --- /dev/null +++ b/tests/testthat/test_merge.R @@ -0,0 +1,13 @@ +context("Testing the merge_intervals() function.") + +test_that("merge_intervals merges in the middle.", { + expect_equal(merge_intervals(i = 3, ni = c(1, 5, 10, 20, 28)), c(1, 5, 20, 28)) +}) + +test_that("merge_intervals merges the first interval.", { + expect_equal(merge_intervals(2, c(1, 5, 10, 20)), c(1, 10, 20)) +}) + +test_that("merge_intervals merges the second-to-last interval.", { + expect_equal(merge_intervals(3, c(1, 5, 10, 20)), c(1, 5, 20)) +}) diff --git a/tests/testthat/test_r2b.R b/tests/testthat/test_r2b.R new file mode 100644 index 0000000..2cb0259 --- /dev/null +++ b/tests/testthat/test_r2b.R @@ -0,0 +1,9 @@ +context("Testing the r2b() function.") + + +test_that("r2b gets the right answers. Duh.", { + x <- c(0.0, 1.5, 2.5, 4.5, 5.0, 6.0, 7.1, 9.7, 10.1, 12.0) + y <- c(2.0, 7.0, 3.0, 4.0, 5.0, 7.0, 7.1, 8.3, 9.1, 10.0) + digs <- 16 # number of digits to the right of decimal place we expect to be the same. + expect_equal(round(r2b(1, length(x), x, y), digits = digs), round(3.833333333333333, digs)) +}) diff --git a/tests/testthat/test_s_m_p.R b/tests/testthat/test_s_m_p.R new file mode 100644 index 0000000..6e6aa22 --- /dev/null +++ b/tests/testthat/test_s_m_p.R @@ -0,0 +1,34 @@ +context("Testing s_m_p.") + +test_that("s_m_p gets the same answers as the Fortran.", { + # Note that this is a pretty bogus answer, but that's what the + # Fortran program produces... + # print("Testing smp.") + eps <- .01 + x <- c(0.0, 5.0, 10.0) + y <- c(1.0, 10.0, 10.0) + expect_equal(s_m_p(eps, x, y), c(1, 3, 3)) + # print("Still testing smp.") + eps <- .01 + x <- c(0.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0) + y <- c(1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 4.0, 4.0, 3.0, 2.0) + expect_equal(s_m_p(eps, x, y), c(1, 3, 6, 8, 10)) + # print("Yet still testing smp.") + + N <- 100 + eps <- .01 + angle <- (0:(N - 1)) * 2 * pi / N + x <- angle + y <- sin(angle) + + result <- c(1, 3, 12, 19, 26, 32, 38, 44, 59, 65, 71, 77, 82, 87, 93, 100) + expect_equal(s_m_p(eps, x, y), result) + # print("No longer testing smp.") +}) + +# test_that("s_m_p fails when x is not an increasing vector", { +# eps = .005 +# x1 <- c(0.0,1.5,2.5,4.5,6.0,5.0,7.1,9.7,10.1,12.0) ## Non increasing vector +# y = c(2.0,7.0,3.0,4.0,5.0,7.0,7.1,8.3,9.1,10.0) +# expect_error(s_m_p(eps,x1,y), "x is not an increasing vector") +# }) diff --git a/tests/testthat/test_spl.R b/tests/testthat/test_spl.R new file mode 100644 index 0000000..0fb8007 --- /dev/null +++ b/tests/testthat/test_spl.R @@ -0,0 +1,31 @@ +context("Testing the split_interval() function.") + +test_that("split_interval divides wide intervals.", { + expect_equal(split_interval(c(1, 5, 10, 20), 2), c(1, 5, 7, 10, 20)) + expect_equal(split_interval(c(1, 2, 6, 10), 2), c(1, 2, 4, 6, 10)) +}) + +test_that("split_interval divides the first and last intervals.", { + expect_equal(split_interval(c(1, 5, 10), 1), c(1, 3, 5, 10)) + expect_equal(split_interval(c(1, 5, 10), 2), c(1, 5, 7, 10)) +}) + +test_that("split_interval divides even,even intervals.", { + expect_equal(split_interval(c(1, 2, 6, 10), 2), c(1, 2, 4, 6, 10)) +}) + +test_that("split_interval divides even,odd intervals.", { + expect_equal(split_interval(c(1, 2, 5, 10), 2), c(1, 2, 4, 5, 10)) +}) + +test_that("split_interval divides odd,even intervals.", { + expect_equal(split_interval(c(1, 3, 6, 10), 2), c(1, 3, 5, 6, 10)) +}) + +test_that("split_interval divides odd,odd intervals.", { + expect_equal(split_interval(c(1, 3, 7, 10), 2), c(1, 3, 5, 7, 10)) +}) + +test_that("interval number is less than length(ni)", { + expect_error(split_interval(c(1, 3, 6, 10), 5)) +}) diff --git a/tests/testthat/test_wtr.layer.R b/tests/testthat/test_wtr.layer.R new file mode 100644 index 0000000..199d326 --- /dev/null +++ b/tests/testthat/test_wtr.layer.R @@ -0,0 +1,31 @@ +context("Testing wtr.layer") +data("latesummer") + +test_that("Ensure that unconstrained and specified layer approach result in the same answer", { + expect_equal( + wtr.layer(depth = latesummer$depth, measure = latesummer$temper)[, 1:4], + wtr.layer( + depth = latesummer$depth, measure = latesummer$temper, + nseg = wtr.layer(depth = latesummer$depth, measure = latesummer$temper)$nseg + )[, 1:4] + ) +}) + + +test_that("NA's are returned when profile has less than 30 readings", { + # Data with less than 10 readings + z <- 1:25 + sigmavar <- rnorm(z) + expect_warning(wtr.layer(depth = z, measure = sigmavar)) + expect_warning(wtr.layer(data = latesummer[1:20, ], depth = depth, measure = temper)) +}) + +test_that("Omitting the data argument results in a valid data.frame with constrained and uncontrained scenarios", { + expect_silent(wtr.layer(depth = latesummer$depth, measure = latesummer$temper)) + expect_silent(wtr.layer(depth = latesummer$depth, measure = latesummer$temper, nseg = 5)) +}) + +test_that("Including the data argument results in a valid data.frame with constrained and uncontrained scenarios", { + expect_silent(wtr.layer(data = latesummer, depth = depth, measure = temper)) + expect_silent(wtr.layer(data = latesummer, depth = depth, measure = temper, nseg = 5)) +})