diff --git a/Dockerfile b/Dockerfile
index 49c16e6..b60ae1c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@ FROM rocker/r-ver:4.2.2
# DeGAUSS container metadata
ENV degauss_name="habre_pm"
-ENV degauss_version="0.2.0"
+ENV degauss_version="0.2.1"
ENV degauss_description="weekly pm2.5 for California (Habre)"
# add OCI labels based on environment variables too
diff --git a/README.Rmd b/README.Rmd
new file mode 100644
index 0000000..03b5cdf
--- /dev/null
+++ b/README.Rmd
@@ -0,0 +1,146 @@
+---
+output: github_document
+---
+
+
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ message = FALSE,
+ warning = FALSE,
+ comment = "#>"
+)
+```
+
+# habre_pm
+
+[](https://github.com/degauss-org/habre_pm/releases)
+[](https://github.com/degauss-org/habre_pm/actions/workflows/build-deploy-release.yaml)
+
+## Using
+
+If `my_address_file_geocoded.csv` is a file in the current working directory with coordinate columns named `lat`, `lon`, (within the state of California) `start_date`, and `end_date` then the [DeGAUSS command](https://degauss.org/using_degauss.html#DeGAUSS_Commands):
+
+```sh
+docker run --rm -v $PWD:/tmp ghcr.io/degauss-org/habre_pm:0.2.1 my_address_file_geocoded.csv
+```
+
+will produce `my_address_file_geocoded_habre_pm_0.2.1.csv` with added columns:
+
+- **`pm`**: time weighted average of weekly PM2.5
+- **`sd`**: time weighted square root of average sum of squared weekly standard deviation
+
+## Geomarker Methods
+
+- Geocoded points are overlaid with weekly PM2.5 rasters corresponding to the input date range.
+- For date ranges that span weeks, exposures are a time-weighted average.
+
+#### Example code for time weighted average PM and SD
+
+```{r}
+library(tidyverse)
+library(dht)
+library(sf)
+```
+
+**Read in data**
+
+Here we will only use row 3 of our test file because it spans two calendar weeks.
+
+```{r}
+d <- read_csv('test/my_address_file_geocoded_habre_pm25_leo.csv',
+ col_types = cols(start_date = col_date(format = "%m/%d/%y"),
+ end_date = col_date(format = "%m/%d/%y"))) |>
+ slice(3) |>
+ select(-prepm25, -prepm25_sd)
+d
+```
+
+**Expand start and end dates to daily**
+
+```{r}
+d_daily <- dht::expand_dates(d, by = "day")
+d_daily
+```
+
+**Read in date lookup / week index and expand start and end dates to daily**
+
+```{r}
+date_lookup <-
+ readr::read_csv("pm25_iweek_startdate.csv") |>
+ dht::expand_dates(by = "day")
+
+date_lookup |>
+ filter(iweek %in% 413:414)
+
+date_lookup <-
+ date_lookup |>
+ dplyr::select(week = iweek, date)
+```
+
+**Join week index to input data using date (daily)**
+
+```{r}
+d_week <- d_daily |>
+ dplyr::left_join(date_lookup, by = "date")
+
+d_week
+```
+
+**Read in and join raster values**
+
+```{r}
+r <- terra::rast("habre.tif")
+
+d_dedup <- d_week |>
+ select(lat, lon, week) |>
+ group_by(lat, lon, week) |>
+ filter(!duplicated(lat, lon, week)) |>
+ mutate(layer_name_mean = glue::glue("week{week}_mean"),
+ layer_name_sd = glue::glue("week{week}_std"))
+
+d_for_extract <- d_dedup |>
+ st_as_sf(coords = c("lon", "lat"), crs = 4326) |>
+ st_transform(st_crs(r)) |>
+ terra::vect()
+
+d_pm <- terra::extract(r,
+ d_for_extract,
+ layer = "layer_name_mean")
+
+d_sd <- terra::extract(r,
+ d_for_extract,
+ layer = "layer_name_sd")
+
+d_dedup <- d_dedup |>
+ ungroup() |>
+ mutate(pm = d_pm$value,
+ sd = d_sd$value)
+
+d_out <- left_join(d_week, d_dedup, by = c("lat", "lon", "week"))
+
+d_out
+
+d_out <- d_out |>
+ group_by(id) |>
+ summarize(pm = round(sum(pm)/n(),2),
+ sd = round(sqrt((sum(sd^2))/n()),2))
+
+
+d_out <- left_join(d, d_out, by = "id")
+d_out
+```
+
+## Geomarker Data
+
+- PM2.5 rasters were created using a model developed by Rima Habre and Lianfa Li.
+
+> Li L, Girguis M, Lurmann F, Pavlovic N, McClure C, Franklin M, Wu J, Oman LD, Breton C, Gilliland F, Habre R. Ensemble-based deep learning for estimating PM2. 5 over California with multisource big data including wildfire smoke. Environment international. 2020 Dec 1;145:106143. https://doi.org/10.1016/j.envint.2020.106143
+
+- The raster stack used in this container is stored in S3 at [`s3://habre/habre.tif`](https://habre.s3-us-east-2.amazonaws.com/habre.tif)
+- Individual rasters that make up the raster stack are stored at [`s3://habre/li_2020/`](https://habre.s3-us-east-2.amazonaws.com/li_2020/)
+
+## DeGAUSS Details
+
+For detailed documentation on DeGAUSS, including general usage and installation, please see the [DeGAUSS homepage](https://degauss.org).
\ No newline at end of file
diff --git a/README.md b/README.md
index 3e9c690..5ed405a 100644
--- a/README.md
+++ b/README.md
@@ -1,35 +1,208 @@
+
+
+
# habre_pm
[](https://github.com/degauss-org/habre_pm/releases)
-[](https://github.com/degauss-org/habre_pm/actions/workflows/build-deploy-release.yaml)
+[](https://github.com/degauss-org/habre_pm/actions/workflows/build-deploy-release.yaml)
## Using
-If `my_address_file_geocoded.csv` is a file in the current working directory with coordinate columns named `lat`, `lon`, (within the state of California) `start_date`, and `end_date` then the [DeGAUSS command](https://degauss.org/using_degauss.html#DeGAUSS_Commands):
+If `my_address_file_geocoded.csv` is a file in the current working
+directory with coordinate columns named `lat`, `lon`, (within the state
+of California) `start_date`, and `end_date` then the [DeGAUSS
+command](https://degauss.org/using_degauss.html#DeGAUSS_Commands):
-```sh
-docker run --rm -v $PWD:/tmp ghcr.io/degauss-org/habre_pm:0.2.0 my_address_file_geocoded.csv
+``` sh
+docker run --rm -v $PWD:/tmp ghcr.io/degauss-org/habre_pm:0.2.1 my_address_file_geocoded.csv
```
-will produce `my_address_file_geocoded_habre_pm_0.2.0.csv` with added columns:
+will produce `my_address_file_geocoded_habre_pm_0.2.1.csv` with added
+columns:
- **`pm`**: time weighted average of weekly PM2.5
-- **`sd`**: standard deviation
+- **`sd`**: time weighted square root of average sum of squared weekly
+ standard deviation
## Geomarker Methods
-- Geocoded points are overlaid with weekly PM2.5 rasters corresponding to the input date range.
-- For date ranges that span weeks, exposures are a time-weighted average.
+- Geocoded points are overlaid with weekly PM2.5 rasters corresponding
+ to the input date range.
+- For date ranges that span weeks, exposures are a time-weighted
+ average.
+
+#### Example code for time weighted average PM and SD
+
+``` r
+library(tidyverse)
+library(dht)
+library(sf)
+```
+
+**Read in data**
+
+Here we will only use row 3 of our test file because it spans two
+calendar weeks.
+
+``` r
+d <- read_csv('test/my_address_file_geocoded_habre_pm25_leo.csv',
+ col_types = cols(start_date = col_date(format = "%m/%d/%y"),
+ end_date = col_date(format = "%m/%d/%y"))) |>
+ slice(3) |>
+ select(-prepm25, -prepm25_sd)
+d
+#> # A tibble: 1 × 6
+#> id lat lon start_date end_date iweek
+#>
+#> 1 3 35.2 -119. 2008-01-25 2008-01-31 413
+```
+
+**Expand start and end dates to daily**
+
+``` r
+d_daily <- dht::expand_dates(d, by = "day")
+d_daily
+#> # A tibble: 7 × 7
+#> id lat lon start_date end_date iweek date
+#>
+#> 1 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-25
+#> 2 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-26
+#> 3 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-27
+#> 4 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-28
+#> 5 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-29
+#> 6 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-30
+#> 7 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-31
+```
+
+**Read in date lookup / week index and expand start and end dates to
+daily**
+
+``` r
+date_lookup <-
+ readr::read_csv("pm25_iweek_startdate.csv") |>
+ dht::expand_dates(by = "day")
+
+date_lookup |>
+ filter(iweek %in% 413:414)
+#> # A tibble: 14 × 4
+#> iweek start_date end_date date
+#>
+#> 1 413 2008-01-21 2008-01-27 2008-01-21
+#> 2 413 2008-01-21 2008-01-27 2008-01-22
+#> 3 413 2008-01-21 2008-01-27 2008-01-23
+#> 4 413 2008-01-21 2008-01-27 2008-01-24
+#> 5 413 2008-01-21 2008-01-27 2008-01-25
+#> 6 413 2008-01-21 2008-01-27 2008-01-26
+#> 7 413 2008-01-21 2008-01-27 2008-01-27
+#> 8 414 2008-01-28 2008-02-03 2008-01-28
+#> 9 414 2008-01-28 2008-02-03 2008-01-29
+#> 10 414 2008-01-28 2008-02-03 2008-01-30
+#> 11 414 2008-01-28 2008-02-03 2008-01-31
+#> 12 414 2008-01-28 2008-02-03 2008-02-01
+#> 13 414 2008-01-28 2008-02-03 2008-02-02
+#> 14 414 2008-01-28 2008-02-03 2008-02-03
+
+date_lookup <-
+ date_lookup |>
+ dplyr::select(week = iweek, date)
+```
+
+**Join week index to input data using date (daily)**
+
+``` r
+d_week <- d_daily |>
+ dplyr::left_join(date_lookup, by = "date")
+
+d_week
+#> # A tibble: 7 × 8
+#> id lat lon start_date end_date iweek date week
+#>
+#> 1 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-25 413
+#> 2 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-26 413
+#> 3 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-27 413
+#> 4 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-28 414
+#> 5 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-29 414
+#> 6 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-30 414
+#> 7 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-31 414
+```
+
+**Read in and join raster values**
+
+``` r
+r <- terra::rast("habre.tif")
+
+d_dedup <- d_week |>
+ select(lat, lon, week) |>
+ group_by(lat, lon, week) |>
+ filter(!duplicated(lat, lon, week)) |>
+ mutate(layer_name_mean = glue::glue("week{week}_mean"),
+ layer_name_sd = glue::glue("week{week}_std"))
+
+d_for_extract <- d_dedup |>
+ st_as_sf(coords = c("lon", "lat"), crs = 4326) |>
+ st_transform(st_crs(r)) |>
+ terra::vect()
+
+d_pm <- terra::extract(r,
+ d_for_extract,
+ layer = "layer_name_mean")
+
+d_sd <- terra::extract(r,
+ d_for_extract,
+ layer = "layer_name_sd")
+
+d_dedup <- d_dedup |>
+ ungroup() |>
+ mutate(pm = d_pm$value,
+ sd = d_sd$value)
+
+d_out <- left_join(d_week, d_dedup, by = c("lat", "lon", "week"))
+
+d_out
+#> # A tibble: 7 × 12
+#> id lat lon start_date end_date iweek date week layer_name_mean
+#>
+#> 1 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-25 413 week413_mean
+#> 2 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-26 413 week413_mean
+#> 3 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-27 413 week413_mean
+#> 4 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-28 414 week414_mean
+#> 5 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-29 414 week414_mean
+#> 6 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-30 414 week414_mean
+#> 7 3 35.2 -119. 2008-01-25 2008-01-31 413 2008-01-31 414 week414_mean
+#> # ℹ 3 more variables: layer_name_sd , pm , sd
+
+d_out <- d_out |>
+ group_by(id) |>
+ summarize(pm = round(sum(pm)/n(),2),
+ sd = round(sqrt((sum(sd^2))/n()),2))
+
+
+d_out <- left_join(d, d_out, by = "id")
+d_out
+#> # A tibble: 1 × 8
+#> id lat lon start_date end_date iweek pm sd
+#>
+#> 1 3 35.2 -119. 2008-01-25 2008-01-31 413 3.76 2.2
+```
## Geomarker Data
-- PM2.5 rasters were created using a model developed by Rima Habre and Lianfa Li.
+- PM2.5 rasters were created using a model developed by Rima Habre and
+ Lianfa Li.
-> Li L, Girguis M, Lurmann F, Pavlovic N, McClure C, Franklin M, Wu J, Oman LD, Breton C, Gilliland F, Habre R. Ensemble-based deep learning for estimating PM2. 5 over California with multisource big data including wildfire smoke. Environment international. 2020 Dec 1;145:106143. https://doi.org/10.1016/j.envint.2020.106143
+> Li L, Girguis M, Lurmann F, Pavlovic N, McClure C, Franklin M, Wu J,
+> Oman LD, Breton C, Gilliland F, Habre R. Ensemble-based deep learning
+> for estimating PM2. 5 over California with multisource big data
+> including wildfire smoke. Environment international. 2020 Dec
+> 1;145:106143.
-- The raster stack used in this container is stored in S3 at [`s3://habre/habre.tif`](https://habre.s3-us-east-2.amazonaws.com/habre.tif)
-- Individual rasters that make up the raster stack are stored at [`s3://habre/li_2020/`](https://habre.s3-us-east-2.amazonaws.com/li_2020/)
+- The raster stack used in this container is stored in S3 at
+ [`s3://habre/habre.tif`](https://habre.s3-us-east-2.amazonaws.com/habre.tif)
+- Individual rasters that make up the raster stack are stored at
+ [`s3://habre/li_2020/`](https://habre.s3-us-east-2.amazonaws.com/li_2020/)
## DeGAUSS Details
-For detailed documentation on DeGAUSS, including general usage and installation, please see the [DeGAUSS homepage](https://degauss.org).
+For detailed documentation on DeGAUSS, including general usage and
+installation, please see the [DeGAUSS homepage](https://degauss.org).
diff --git a/entrypoint.R b/entrypoint.R
index 2f0af24..d989b57 100755
--- a/entrypoint.R
+++ b/entrypoint.R
@@ -76,7 +76,7 @@ d_dedup <- d_dedup |>
d_out <- left_join(d_week, d_dedup, by = c("lat", "lon", "week")) |>
group_by(.row) |>
summarize(pm = round(sum(pm)/n(),2),
- sd = round(sum(sd)/n(),2))
+ sd = round(sqrt((sum(sd^2))/n()),2))
d_out <- left_join(d, d_out, by = ".row") |>
select(-.row)
diff --git a/test/my_address_file_geocoded_habre_pm_0.1.0.csv b/test/my_address_file_geocoded_habre_pm_0.1.0.csv
deleted file mode 100644
index 34f1212..0000000
--- a/test/my_address_file_geocoded_habre_pm_0.1.0.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-id,lat,lon,start_date,end_date,pm,sd
-1,34.41471040449594,-115.04499837139416,2008-01-21,2008-01-27,4.05,2.67
-2,34.02192926572527,-116.056401323031,2008-01-28,2008-02-03,3.55,1.65
-3,35.22267356636723,-118.54506279596852,2008-01-25,2008-01-31,5.8,1.61
diff --git a/test/my_address_file_geocoded_habre_pm_0.2.0.csv b/test/my_address_file_geocoded_habre_pm_0.2.1.csv
similarity index 100%
rename from test/my_address_file_geocoded_habre_pm_0.2.0.csv
rename to test/my_address_file_geocoded_habre_pm_0.2.1.csv