From 240b7307d11d16bbb72bdc76f1305de1743f644b Mon Sep 17 00:00:00 2001 From: boshek Date: Fri, 29 Sep 2017 10:31:21 -0700 Subject: [PATCH] Build vignettes --- .gitignore | 1 - inst/doc/data.handling.R | 26 +++++ inst/doc/data.handling.Rmd | 73 ++++++++++++ inst/doc/data.handling.html | 217 ++++++++++++++++++++++++++++++++++++ inst/doc/sm_algorithm.R | 21 ++++ inst/doc/sm_algorithm.Rmd | 51 +++++++++ inst/doc/sm_algorithm.html | 152 +++++++++++++++++++++++++ 7 files changed, 540 insertions(+), 1 deletion(-) create mode 100644 inst/doc/data.handling.R create mode 100644 inst/doc/data.handling.Rmd create mode 100644 inst/doc/data.handling.html create mode 100644 inst/doc/sm_algorithm.R create mode 100644 inst/doc/sm_algorithm.Rmd create mode 100644 inst/doc/sm_algorithm.html diff --git a/.gitignore b/.gitignore index aee7cf1..3575951 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,3 @@ .RData *.Rproj -inst/doc diff --git a/inst/doc/data.handling.R b/inst/doc/data.handling.R new file mode 100644 index 0000000..fb3cfc4 --- /dev/null +++ b/inst/doc/data.handling.R @@ -0,0 +1,26 @@ +## ---- echo = FALSE------------------------------------------------------- +library(knitr) + +## ---- echo = FALSE------------------------------------------------------- +df2 <- data.frame(datetime = c("2008-07-01 01:00","2008-07-01 02:00","2008-07-01 03:00","2008-07-01 04:00"), + doobs_0.5= c("8.3","8.2","8.2","8.1")) +kable(df2) + +## ---- echo = FALSE------------------------------------------------------- +df1 <- data.frame(datetime = c("2008-07-01 01:00","2008-07-01 02:00","2008-07-01 03:00","2008-07-01 04:00"), + wtr_0.5= c("22.3","22.31","22.31","22.32"), + wtr_1 = c("22.3","22.31","22.31","22.32"), + wtr_2 = rep(21, 4)) +kable(df1) + +## ---- echo = FALSE------------------------------------------------------- +df <- data.frame(Abbreviation = c("doobs","wtr","wnd","airT","rh"), + Variable = c("Dissolved Oxygen Concentration","Water Temperature","Wind Speed", + "Air Temperature","Relative Humidity"), + `Assumed Units` = c("mg/L ","°C","m/s","°C","%")) +kable(df) + +## ---- eval = FALSE------------------------------------------------------- +# tmp = data.frame() +# write.table(tmp, "test.wtr", sep='\t', row.names=FALSE) + diff --git a/inst/doc/data.handling.Rmd b/inst/doc/data.handling.Rmd new file mode 100644 index 0000000..deedea4 --- /dev/null +++ b/inst/doc/data.handling.Rmd @@ -0,0 +1,73 @@ +--- +title: "Introduction to file and data formats in rLakeAnalyzer" +author: "Luke Winslow" +date: "July 6, 2014" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Vignette Title} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +header-includes: + - \usepackage{gensymb} +--- + +```{r, echo = FALSE} +library(knitr) +``` + +## Introduction +This document is an introduction to handling the type of data typically used in rLakeAnalyzer. It will hopefully give the reader enough information to be able to quickly and effectively format your own data to take advantage of the more powerful features. + +## File Format + +We have tried to use a simple but standard file format that eases import and parsing of data while still being easy to generate and edit using commonly available tools like Microsoft Excel. Below is a very simple example of how the files are structured. + +```{r, echo = FALSE} +df2 <- data.frame(datetime = c("2008-07-01 01:00","2008-07-01 02:00","2008-07-01 03:00","2008-07-01 04:00"), + doobs_0.5= c("8.3","8.2","8.2","8.1")) +kable(df2) +``` + +There are a few key aspects to these file structure to note. The date/time format, the format of the header, and the file naming scheme. These key points are discussed here. + +## DateTime Format + +The date and time of all observations is stored in a single, string column. The header of this column must be the word "datetime" without quotes. It is also case insensitive so "DateTime" and other variations will work. + +The datetime format itself is exclusively in an ISO-like format (ISO-8601). It is in most-to-least significant order. It requires a "-" (dash) delimited date format and a ":" (colon) delimited time. "yyyy-mm-dd HH:MM:SS". The date must come first and is separated from the time with a single space. Seconds are optional. This format can easily be created in Excel using a custom date/time format of "yyyy-mm-dd hh:mm:ss" without quotes. + +Note: This format differs from the ISO-8601 format in that a space is used to separate the date and time. This is done to support the use of Microsoft Excel as Excel does not natively recognize the ISO format. + +## Header Format + +The header is used to help identify both the variable type as well as the depth of observation of the data as well as distinguish the data columns from the datetime column. As mentioned above, a "datetime" column is required using the format described above. + +The data columns must be identified with a variable type and optionally, a depth. For example, a water temperature collected at 1 meter depth would have the column header "wtr\_1". The usefulness of this simple format can be seen when dealing with profile data taken at many depths (see below). + +```{r, echo = FALSE} +df1 <- data.frame(datetime = c("2008-07-01 01:00","2008-07-01 02:00","2008-07-01 03:00","2008-07-01 04:00"), + wtr_0.5= c("22.3","22.31","22.31","22.32"), + wtr_1 = c("22.3","22.31","22.31","22.32"), + wtr_2 = rep(21, 4)) +kable(df1) +``` + +While any text can be used to describe a variable, the table below lists the current "standard" variables that are used by rLakeAnalyzer and other toolboxes for identifying commonly collected data in the most common units. If these standards are adhered to, many of the more helpful functions will work natively. For example, water.density expects temperature to be supplied in celsius, the default unit used for the "wtr" abbreviation. + +```{r, echo = FALSE} +df <- data.frame(Abbreviation = c("doobs","wtr","wnd","airT","rh"), + Variable = c("Dissolved Oxygen Concentration","Water Temperature","Wind Speed", + "Air Temperature","Relative Humidity"), + `Assumed Units` = c("mg/L ","°C","m/s","°C","%")) +kable(df) +``` + + +## File Format + +The file format is a simple tab-delimited file. It is easy to export files of this format using Excel or even R itself. To export the appropriate format from R, use "write.table" as in the following example. + +```{r, eval = FALSE} +tmp = data.frame() +write.table(tmp, "test.wtr", sep='\t', row.names=FALSE) +``` diff --git a/inst/doc/data.handling.html b/inst/doc/data.handling.html new file mode 100644 index 0000000..68bee8a --- /dev/null +++ b/inst/doc/data.handling.html @@ -0,0 +1,217 @@ + + + + + + + + + + + + + + + +Introduction to file and data formats in rLakeAnalyzer + + + + + + + + + + + + + + + + + + +

Introduction to file and data formats in rLakeAnalyzer

+

Luke Winslow

+

July 6, 2014

+ + + +
## Warning: package 'knitr' was built under R version 3.4.1
+
+

Introduction

+

This document is an introduction to handling the type of data typically used in rLakeAnalyzer. It will hopefully give the reader enough information to be able to quickly and effectively format your own data to take advantage of the more powerful features.

+
+
+

File Format

+

We have tried to use a simple but standard file format that eases import and parsing of data while still being easy to generate and edit using commonly available tools like Microsoft Excel. Below is a very simple example of how the files are structured.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
datetimedoobs_0.5
2008-07-01 01:008.3
2008-07-01 02:008.2
2008-07-01 03:008.2
2008-07-01 04:008.1
+

There are a few key aspects to these file structure to note. The date/time format, the format of the header, and the file naming scheme. These key points are discussed here.

+
+
+

DateTime Format

+

The date and time of all observations is stored in a single, string column. The header of this column must be the word “datetime” without quotes. It is also case insensitive so “DateTime” and other variations will work.

+

The datetime format itself is exclusively in an ISO-like format (ISO-8601). It is in most-to-least significant order. It requires a “-” (dash) delimited date format and a “:” (colon) delimited time. “yyyy-mm-dd HH:MM:SS”. The date must come first and is separated from the time with a single space. Seconds are optional. This format can easily be created in Excel using a custom date/time format of “yyyy-mm-dd hh:mm:ss” without quotes.

+

Note: This format differs from the ISO-8601 format in that a space is used to separate the date and time. This is done to support the use of Microsoft Excel as Excel does not natively recognize the ISO format.

+
+
+

Header Format

+

The header is used to help identify both the variable type as well as the depth of observation of the data as well as distinguish the data columns from the datetime column. As mentioned above, a “datetime” column is required using the format described above.

+

The data columns must be identified with a variable type and optionally, a depth. For example, a water temperature collected at 1 meter depth would have the column header “wtr_1”. The usefulness of this simple format can be seen when dealing with profile data taken at many depths (see below).

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
datetimewtr_0.5wtr_1wtr_2
2008-07-01 01:0022.322.321
2008-07-01 02:0022.3122.3121
2008-07-01 03:0022.3122.3121
2008-07-01 04:0022.3222.3221
+

While any text can be used to describe a variable, the table below lists the current “standard” variables that are used by rLakeAnalyzer and other toolboxes for identifying commonly collected data in the most common units. If these standards are adhered to, many of the more helpful functions will work natively. For example, water.density expects temperature to be supplied in celsius, the default unit used for the “wtr” abbreviation.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AbbreviationVariableAssumed.Units
doobsDissolved Oxygen Concentrationmg/L
wtrWater Temperature°C
wndWind Speedm/s
airTAir Temperature°C
rhRelative Humidity%
+
+
+

File Format

+

The file format is a simple tab-delimited file. It is easy to export files of this format using Excel or even R itself. To export the appropriate format from R, use “write.table” as in the following example.

+
tmp = data.frame()
+write.table(tmp, "test.wtr", sep='\t', row.names=FALSE)
+
+ + + + + + + + diff --git a/inst/doc/sm_algorithm.R b/inst/doc/sm_algorithm.R new file mode 100644 index 0000000..20b3d4c --- /dev/null +++ b/inst/doc/sm_algorithm.R @@ -0,0 +1,21 @@ +## ---- message=FALSE------------------------------------------------------ +library(rLakeAnalyzer) +library(knitr) + +## ------------------------------------------------------------------------ +data("latesummer") +wldf <- wtr.layer(depth = latesummer$depth, measure = latesummer$temper) +knitr::kable(wldf) + +## ---- eval = TRUE, echo=TRUE--------------------------------------------- +wldf$segments + +## ---- fig.show = "hold", fig.width = 8, fig.height = 6------------------- +plot(y = latesummer$depth, x = latesummer$temper, ylim = rev(range(latesummer$depth))) +abline(h = wldf$cline, col='blue') +abline(h = wldf$mld, col='red') +abline(h = wldf$min_depth, col='green') +text(16, wldf$cline+3, "Thermocline", col = 'blue') +text(16, wldf$mld+3, "Mix Layer Depth", col = 'red') +text(16, wldf$min_depth+3, "Minimum Depth", col = 'green') + diff --git a/inst/doc/sm_algorithm.Rmd b/inst/doc/sm_algorithm.Rmd new file mode 100644 index 0000000..3275364 --- /dev/null +++ b/inst/doc/sm_algorithm.Rmd @@ -0,0 +1,51 @@ +--- +title: "Split and Merge Algorithm Usage" +author: "Sam Albers and Doug Collinge" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Vignette Title} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + +## Packages needing loading +```{r, message=FALSE} +library(rLakeAnalyzer) +library(knitr) +``` + + +## Split and merge algorithm +Water column identification is provided by the split-and-merge algorithm. Implementation of the split-and-merge algorithm for a water profile occurs within the `wtr.layer()` function: + +## Simple application of the split and merge algorithm +Below is a simple one profile example of determining key water column parameters using the split-and-merge algorithm. The default behaviour for `wtr.layer` is to run the algorithm *without* specifying the number of segments. `wtr.layer()` adopt as defaults the convention of a minimum depth (z0) of 2.5 m, a maximum depth (zmax) of 150 m and a error threshold (thres) of 0.1. +```{r} +data("latesummer") +wldf <- wtr.layer(depth = latesummer$depth, measure = latesummer$temper) +knitr::kable(wldf) +``` + +In this example, you'll note that `wldf$cline` is formatted as a list-column. A thorough demonstration of a list column can be found [here](https://jennybc.github.io/purrr-tutorial/ls13_list-columns.html). This type of data format has been included here to consolidate split and merge results and align the output to work well with [tidyverse](https://www.tidyverse.org/) tools. If you are interested in working with the segments data from `wtr.layer()`, use this approach: +```{r, eval = TRUE, echo=TRUE} +wldf$segments +``` + +Note that the axes of the water column profile have been reversed and flipped to better visualize the water column and conform to standard limnological displays. +```{r, fig.show = "hold", fig.width = 8, fig.height = 6} +plot(y = latesummer$depth, x = latesummer$temper, ylim = rev(range(latesummer$depth))) +abline(h = wldf$cline, col='blue') +abline(h = wldf$mld, col='red') +abline(h = wldf$min_depth, col='green') +text(16, wldf$cline+3, "Thermocline", col = 'blue') +text(16, wldf$mld+3, "Mix Layer Depth", col = 'red') +text(16, wldf$min_depth+3, "Minimum Depth", col = 'green') +``` + +## Important references +- Pavlidis, T., and S. L. Horowitz, 1974: Segmentation of plan curves.IEEE Trans. Comput., C-23, 860–870. +- Thomson, R. and I. Fine. 2003. Estimating Mixed Layer Depth from Oceanic Profile Data. Journal of Atmospheric and Oceanic Technology. 20(2), 319-329. +- Fiedler, Paul C. "Comparison of objective descriptions of the thermocline. Limnology and Oceanography: Methods 8.6 (2010): 313-325. + diff --git a/inst/doc/sm_algorithm.html b/inst/doc/sm_algorithm.html new file mode 100644 index 0000000..29f311c --- /dev/null +++ b/inst/doc/sm_algorithm.html @@ -0,0 +1,152 @@ + + + + + + + + + + + + + + + + +Split and Merge Algorithm Usage + + + + + + + + + + + + + + + + + +

Split and Merge Algorithm Usage

+

Sam Albers and Doug Collinge

+

2017-09-29

+ + + +
+

Packages needing loading

+
library(rLakeAnalyzer)
+library(knitr)
+
+
+

Split and merge algorithm

+

Water column identification is provided by the split-and-merge algorithm. Implementation of the split-and-merge algorithm for a water profile occurs within the wtr.layer() function:

+
+
+

Simple application of the split and merge algorithm

+

Below is a simple one profile example of determining key water column parameters using the split-and-merge algorithm. The default behaviour for wtr.layer is to run the algorithm without specifying the number of segments. wtr.layer() adopt as defaults the convention of a minimum depth (z0) of 2.5 m, a maximum depth (zmax) of 150 m and a error threshold (thres) of 0.1.

+
data("latesummer")
+wldf <- wtr.layer(depth = latesummer$depth, measure = latesummer$temper)
+knitr::kable(wldf)
+ + + + + + + + + + + + + + + + + + + +
min_depthnsegmldclinesegments
2.547.056516.390252.59800, 7.05650, 25.72400, 98.13900, 17.94060, 17.38405, 5.51445, 4.46375
+

In this example, you’ll note that wldf$cline is formatted as a list-column. A thorough demonstration of a list column can be found here. This type of data format has been included here to consolidate split and merge results and align the output to work well with tidyverse tools. If you are interested in working with the segments data from wtr.layer(), use this approach:

+
wldf$segments
+
## [[1]]
+##   segment_depth segment_measure
+## 1        2.5980        17.94060
+## 2        7.0565        17.38405
+## 3       25.7240         5.51445
+## 4       98.1390         4.46375
+

Note that the axes of the water column profile have been reversed and flipped to better visualize the water column and conform to standard limnological displays.

+
plot(y = latesummer$depth, x = latesummer$temper, ylim = rev(range(latesummer$depth)))
+abline(h = wldf$cline, col='blue')
+abline(h = wldf$mld, col='red')
+abline(h = wldf$min_depth, col='green')
+text(16, wldf$cline+3, "Thermocline", col = 'blue')
+text(16, wldf$mld+3, "Mix Layer Depth", col = 'red')
+text(16, wldf$min_depth+3, "Minimum Depth", col = 'green')
+

+
+
+

Important references

+ +
+ + + + + + + +