diff --git a/.gitignore b/.gitignore index 5b6a065..dc558ea 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ .Rhistory .RData .Ruserdata +s3_downloads/ +habre.tif \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 05254ee..49c16e6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM rocker/r-ver:4.2.2 # DeGAUSS container metadata ENV degauss_name="habre_pm" -ENV degauss_version="0.1.0" +ENV degauss_version="0.2.0" ENV degauss_description="weekly pm2.5 for California (Habre)" # add OCI labels based on environment variables too @@ -28,7 +28,7 @@ COPY renv.lock . RUN R --quiet -e "renv::restore()" -COPY habre.tif . +ADD https://habre.s3-us-east-2.amazonaws.com/habre.tif habre.tif COPY pm25_iweek_startdate.csv . COPY entrypoint.R . diff --git a/README.md b/README.md index dbd44c7..3e9c690 100644 --- a/README.md +++ b/README.md @@ -5,31 +5,31 @@ ## Using -If `my_address_file_geocoded.csv` is a file in the current working directory with coordinate columns named `lat` and `lon`, then the [DeGAUSS command](https://degauss.org/using_degauss.html#DeGAUSS_Commands): +If `my_address_file_geocoded.csv` is a file in the current working directory with coordinate columns named `lat`, `lon`, (within the state of California) `start_date`, and `end_date` then the [DeGAUSS command](https://degauss.org/using_degauss.html#DeGAUSS_Commands): ```sh -docker run --rm -v $PWD:/tmp ghcr.io/degauss-org/habre_pm:0.1.0 my_address_file_geocoded.csv +docker run --rm -v $PWD:/tmp ghcr.io/degauss-org/habre_pm:0.2.0 my_address_file_geocoded.csv ``` -will produce `my_address_file_geocoded_habre_pm_0.1.0.csv` with added columns: +will produce `my_address_file_geocoded_habre_pm_0.2.0.csv` with added columns: -- **`pm`**: a definition of this geomarker -- **`sd`**: a definition of that geomarker - -### Optional Argument - -- If this DeGAUSS container takes an optional argument, describe its usage and effects here. -- Be sure to also update the example output file name with the argument value. +- **`pm`**: time weighted average of weekly PM2.5 +- **`sd`**: standard deviation ## Geomarker Methods -- If needed, put details here about the methods and assumptions used in the geomarker assessment process. +- Geocoded points are overlaid with weekly PM2.5 rasters corresponding to the input date range. +- For date ranges that span weeks, exposures are a time-weighted average. ## Geomarker Data -- List how geomarker was created, ideally including any scripts within the repo used to do so or linking to an external repository -- If applicable, list where geomarker data is stored in S3 using a hyperlink like: [`s3://path/to/habre_pm.rds`](https://geomarker.s3.us-east-2.amazonaws.com/path/to/habre_pm.rds) +- PM2.5 rasters were created using a model developed by Rima Habre and Lianfa Li. + +> Li L, Girguis M, Lurmann F, Pavlovic N, McClure C, Franklin M, Wu J, Oman LD, Breton C, Gilliland F, Habre R. Ensemble-based deep learning for estimating PM2. 5 over California with multisource big data including wildfire smoke. Environment international. 2020 Dec 1;145:106143. https://doi.org/10.1016/j.envint.2020.106143 + +- The raster stack used in this container is stored in S3 at [`s3://habre/habre.tif`](https://habre.s3-us-east-2.amazonaws.com/habre.tif) +- Individual rasters that make up the raster stack are stored at [`s3://habre/li_2020/`](https://habre.s3-us-east-2.amazonaws.com/li_2020/) ## DeGAUSS Details -For detailed documentation on DeGAUSS, including general usage and installation, please see the [DeGAUSS homepage](https://degauss.org). \ No newline at end of file +For detailed documentation on DeGAUSS, including general usage and installation, please see the [DeGAUSS homepage](https://degauss.org). diff --git a/make_raster_stack.R b/make_raster_stack.R new file mode 100644 index 0000000..142a267 --- /dev/null +++ b/make_raster_stack.R @@ -0,0 +1,12 @@ +library(terra) + +# weeks 410 to 931 +rast_files <- c(glue::glue("s3://habre/li_2020/week{410:931}_mean.tif"), + glue::glue("s3://habre/li_2020/week{410:931}_std.tif")) + +rast_files <- s3::s3_get_files(rast_files) +r <- terra::rast(rast_files$file_path) +r <- round(r, digits = 2) # original 4 files: 7.7 MB to 4.2 MB +terra::writeRaster(r, "habre.tif", overwrite = TRUE) + +fs::dir_delete("s3_downloads") diff --git a/test/my_address_file_geocoded.csv b/test/my_address_file_geocoded.csv index 8e2e81f..905bac4 100644 --- a/test/my_address_file_geocoded.csv +++ b/test/my_address_file_geocoded.csv @@ -1,4 +1,4 @@ -id,lat,lon,start_date,end_date -1,34.41471040449594,-115.04499837139416,2008-01-21,2008-01-27 -2,34.02192926572527,-116.056401323031,2008-01-28,2008-02-03 -3,35.22267356636723,-118.54506279596853,2008-01-25,2008-01-31 +id,lat,lon,start_date,end_date +1,34.4147104,-115.0449984,2008-01-21,2008-01-27 +2,34.02192927,-116.0564013,2008-01-28,2008-02-03 +3,35.22267357,-118.5450628,2010-01-25,2010-01-31 \ No newline at end of file diff --git a/test/my_address_file_geocoded_habre_pm_0.2.0.csv b/test/my_address_file_geocoded_habre_pm_0.2.0.csv new file mode 100644 index 0000000..2701804 --- /dev/null +++ b/test/my_address_file_geocoded_habre_pm_0.2.0.csv @@ -0,0 +1,4 @@ +id,lat,lon,start_date,end_date,pm,sd +1,34.4147104,-115.0449984,2008-01-21,2008-01-27,4.39,3.31 +2,34.02192927,-116.0564013,2008-01-28,2008-02-03,3.64,1.65 +3,35.22267357,-118.5450628,2010-01-25,2010-01-31,4.72,2.45