From 0e0143a82fe34c12aa5406679da3c0e7303642fa Mon Sep 17 00:00:00 2001 From: ALanguillaume Date: Thu, 2 Jun 2022 16:46:05 +0200 Subject: [PATCH 1/3] Update R version Why? - R was out of date on @ALanguillaume's computer What? - Install new R version - Reinstall all packages - Make sure local R profile knows about Pop!_Os --- .Rprofile | 6 +++++- dev/02_dev.R | 11 ++++++----- diades.atlas.Rproj | 1 + 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.Rprofile b/.Rprofile index 1dd4f46..9087ec8 100644 --- a/.Rprofile +++ b/.Rprofile @@ -18,7 +18,7 @@ if (grepl("ubuntu 18.04|debian 8", tolower(utils::osVersion))) { # repos <- c("RSPM" = "https://cran.rstudio.com", "thinkropen" = "https://thinkr-open.r-universe.dev", "CRAN" = "https://cran.rstudio.com") -} else if (grepl("ubuntu 20.04|debian 9", tolower(utils::osVersion))) { +} else if (grepl("ubuntu 20.04|debian 9|pop!_os 22.04 lts", tolower(utils::osVersion))) { repos <- c("RSPM" = "https://packagemanager.rstudio.com/all/__linux__/focal/latest", # repos <- c("RSPM" = "https://cran.rstudio.com", "thinkropen" = "https://thinkr-open.r-universe.dev", @@ -28,6 +28,10 @@ if (grepl("ubuntu 18.04|debian 8", tolower(utils::osVersion))) { repos <- c("RSPM" = "https://packagemanager.rstudio.com/all/__linux__/centos7/latest", "thinkropen" = "https://thinkr-open.r-universe.dev", "CRAN" = "https://cran.rstudio.com") +} else if ( grepl("pop!_os 22.04 lts", tolower(utils::osVersion)) ) { + repos <- c("RSPM" = "https://packagemanager.rstudio.com/all/latest", + "thinkropen" = "https://thinkr-open.r-universe.dev", + "CRAN" = "https://cran.rstudio.com") } else { # Important for MacOS users in particular repos <- c("RSPM" = "https://cran.rstudio.com", diff --git a/dev/02_dev.R b/dev/02_dev.R index 72c58b1..a3a954e 100644 --- a/dev/02_dev.R +++ b/dev/02_dev.R @@ -68,12 +68,13 @@ renv::restore() # Force installation from source of packages that need compilation packages <- c( - tmap = "3.3.2", + # sf = "1.0-4", lwgeom = "0.2.8", - V8 = "3.4.2", - testthat = "3.1.0", - rgeos = "0.5-5", - jqr = "1.2.1", + # tmap = "3.3.2", + # V8 = "3.4.2", + # testthat = "3.1.0", + # rgeos = "0.5-5", + # jqr = "1.2.1", NULL ) diff --git a/diades.atlas.Rproj b/diades.atlas.Rproj index 21a4da0..176148b 100644 --- a/diades.atlas.Rproj +++ b/diades.atlas.Rproj @@ -14,4 +14,5 @@ LaTeX: pdfLaTeX BuildType: Package PackageUseDevtools: Yes +PackageCleanBeforeInstall: Yes PackageInstallArgs: --no-multiarch --with-keep.source From 5526d8a016a3706e0bf04f86e5bd0494c976d14e Mon Sep 17 00:00:00 2001 From: ALanguillaume Date: Thu, 2 Jun 2022 16:48:21 +0200 Subject: [PATCH 2/3] Remove basin geomertry simplification in R Why? - Simplification already made in database. What? - Remove call to ms_simplify() Issues #67 --- R/utils_helpers.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/utils_helpers.R b/R/utils_helpers.R index 9755161..e1db4f4 100644 --- a/R/utils_helpers.R +++ b/R/utils_helpers.R @@ -208,8 +208,7 @@ generate_datasets <- function(con) { catchment_geom <- sf::st_read( con, query = "SELECT * FROM diadesatlas.v_basin vb" - ) %>% - rmapshaper::ms_simplify() + ) dataALL <- DBI::dbGetQuery( con, From 61b3c110c0bb53e9d4f92c5846cc399f2f69031b Mon Sep 17 00:00:00 2001 From: ALanguillaume Date: Thu, 2 Jun 2022 16:56:54 +0200 Subject: [PATCH 3/3] Add regex invalid ices_geom Why? - Two polygons have invalid geometry in current db version. What? - Add reprex in data exploration vignette Issues #67 --- data-raw/aa-a-exploration_data.Rmd | 47 ++- data-raw/aa-a-exploration_data.html | 576 ++++++++++++++++++++++++++++ data-raw/bb-page1-catch-bycatch.Rmd | 2 +- 3 files changed, 617 insertions(+), 8 deletions(-) create mode 100644 data-raw/aa-a-exploration_data.html diff --git a/data-raw/aa-a-exploration_data.Rmd b/data-raw/aa-a-exploration_data.Rmd index 2a1333c..392cf9c 100644 --- a/data-raw/aa-a-exploration_data.Rmd +++ b/data-raw/aa-a-exploration_data.Rmd @@ -16,7 +16,7 @@ remotes::install_github('inrae/diades.atlas') ``` ## Packages -```{r} +```{r, message=FALSE} library(dplyr) library(DBI) library(ggplot2) @@ -59,9 +59,9 @@ However, at some point, some of your {tidyverse} operations can not be realised For instance. - `dbGetQuery()` download data in R - - `filter()` is executed by R in your R session +- `filter()` is executed by R in your R session - Note that `!!` is a specific to using {dplyr} with SQL there. - + This is because the variable after it (e.g. `species_id`) is defined in the R session, but not in the database. Therefore, before sending the SQL query to the database, R has to transform the variable by its real value in R. Otherwise, it will send the word `"species_id"` which does not make sense for the SQL database, instead of the number you wanted to put. ++ This is because the variable after it (e.g. `species_id`) is defined in the R session, but not in the database. Therefore, before sending the SQL query to the database, R has to transform the variable by its real value in R. Otherwise, it will send the word `"species_id"` which does not make sense for the SQL database, instead of the number you wanted to put. ```{r} @@ -75,13 +75,16 @@ get_data_dbi <- function(conn_eurodiad, species_id, scenario) { } # Use it -get_data_dbi(conn_eurodiad, - species_id = c(6), - scenario = 'rcp85') +get_data_dbi( + conn_eurodiad, + species_id = c(6), + scenario = 'rcp85' +) %>% + head() ``` - `tbl()` only connects to the table, only a glimpse of the data is presented - - `filter()` is run by the SQL database +- `filter()` is run by the SQL database ```{r} get_data_tbl <- function(conn_eurodiad, species_id, scenario) { @@ -139,6 +142,36 @@ get_data_tbl_query_collect(conn_eurodiad, geom_line(aes(y = hsi)) ``` +## Reprex invalid ices_geom + +Two polygons of ices_geom have invalid geometries. + +```{r} +ices_geom <- st_read( + conn_eurodiad, + query = "SELECT * FROM diadesatlas.v_ices_geom;" +) %>% + st_transform("+proj=wintri") #%>% +# rmapshaper::ms_simplify() +``` + + +```{r} +invalid_pols <- ices_geom %>% + filter(!st_is_valid(.)) + +nrow(invalid_pols) +``` + +This makes the interactive map fails. + +```{r, error=TRUE} +map_invalid <- tm_shape(invalid_pols)+ + tm_sf() + +tmap_leaflet(map_invalid) +``` + ## Disconnect from the database diff --git a/data-raw/aa-a-exploration_data.html b/data-raw/aa-a-exploration_data.html new file mode 100644 index 0000000..92695dc --- /dev/null +++ b/data-raw/aa-a-exploration_data.html @@ -0,0 +1,576 @@ + + + + + + + + + + + + + + +00-exploration-of-data + + + + + + + + + + + + + + + + + + + + + + + + + +

00-exploration-of-data

+ + + +
+

Installation

+

Install the present package and use its core functions

+
remotes::install_github('inrae/diades.atlas')
+
+
+

Packages

+
library(dplyr)
+library(DBI)
+library(ggplot2)
+library(RPostgres)
+# library(diades.atlas)
+pkgload::load_all()
+

You can use this package without opening the Shiny +application

+
+
+

Connect to the database

+
se <- new.env()
+
+conn_eurodiad <- dbConnect(
+  RPostgres::Postgres(), 
+  dbname = 'eurodiad',
+  host = '<your-host>',
+  port = 5432,
+  user = '<your-username>',
+  password = rstudioapi::askForPassword("Database password"))
+
+session$userData$con <-conn_eurodiad
+
# For ThinkR only
+se <- new.env()
+conn_eurodiad <- connect(session = se)
+DBI::dbListTables(conn_eurodiad)
+
##  [1] "abundance_level"          "basin"                   
+##  [3] "basin_outlet"             "casestudy_basin"         
+##  [5] "category"                 "cices"                   
+##  [7] "climatic_model"           "hydiadparameter"         
+##  [9] "ices_area"                "ices_grass"              
+## [11] "ices_rectangle"           "iucn_classification"     
+## [13] "iucn_level"               "lang85_85"               
+## [15] "species"                  "species_rectangle"       
+## [17] "v_abundance"              "v_maxvalue_es"           
+## [19] "v_hybrid_model"           "v_hybrid_model_mavg"     
+## [21] "v_ices_geom"              "v_occurence"             
+## [23] "v_species_ices_occurence" "abundance"               
+## [25] "casestudy"                "ecosystemic_services"    
+## [27] "esvalue"                  "hybrid_model_result"     
+## [29] "locale"                   "outlet_distance"         
+## [31] "species_division"         "species_iucn"            
+## [33] "subcategory"              "v_basin"                 
+## [35] "v_ecosystemic_services"   "v_iucn"
+
+
+

Explore data

+

It is recommended to use tbl() instead of +dbGetQuery() to connect to a table in the database.
+tbl() does not download the table, it only connect to the +database and let it run the query. This means that if you run a +filter() or a mutate() after that, the +database executes it, not R, which is more efficient.
+However, at some point, some of your {tidyverse} operations can not be +realised or translated in SQL, hence, you need to download the resulting +dataset using collect(). Then you can continue as +usual.

+

For instance.

+
    +
  • dbGetQuery() download data in R
  • +
  • filter() is executed by R in your R session
  • +
  • Note that !! is a specific to using {dplyr} with SQL +there.
  • +
  • This is because the variable after it (e.g. species_id) +is defined in the R session, but not in the database. Therefore, before +sending the SQL query to the database, R has to transform the variable +by its real value in R. Otherwise, it will send the word +"species_id" which does not make sense for the SQL +database, instead of the number you wanted to put.
  • +
+
# Create a function
+get_data_dbi <- function(conn_eurodiad, species_id, scenario) {
+  query <- "SELECT * FROM diadesatlas.v_hybrid_model"
+  
+  dbGetQuery(conn_eurodiad, query) %>%
+    filter(species_id %in% !!species_id,
+           climatic_scenario %in% !!scenario) 
+}
+
+# Use it
+get_data_dbi(
+  conn_eurodiad,
+  species_id = c(6),
+  scenario = 'rcp85'
+) %>% 
+  head()
+
##   hybrid_model_result_id basin_id species_id climatic_model_id year      nit
+## 1                1395745        1          6                 2 1951 568.9058
+## 2                1395746        1          6                 2 1952 485.9221
+## 3                1395747        1          6                 2 1953 429.9423
+## 4                1395748        1          6                 2 1954 383.9996
+## 5                1395749        1          6                 2 1955 297.9209
+## 6                1395750        1          6                 2 1956 208.8928
+##         hsi saturation_rate climatic_scenario climatic_model_code oceanic_model
+## 1 0.2442476       0.2697492             rcp85             cnrmcm5      CNRM-CM5
+## 2 0.3222632       0.1746248             rcp85             cnrmcm5      CNRM-CM5
+## 3 0.1945927       0.2558784             rcp85             cnrmcm5      CNRM-CM5
+## 4 0.2457150       0.1809877             rcp85             cnrmcm5      CNRM-CM5
+## 5 0.0849822       0.4059972             rcp85             cnrmcm5      CNRM-CM5
+## 6 0.2351029       0.1029000             rcp85             cnrmcm5      CNRM-CM5
+##       atmospheric_model climatic_model_hm basin_name  fish_name  latin_name
+## 1 CNRM-CERFACS-CNRM-CM5                cn         Aa Allis shad Alosa alosa
+## 2 CNRM-CERFACS-CNRM-CM5                cn         Aa Allis shad Alosa alosa
+## 3 CNRM-CERFACS-CNRM-CM5                cn         Aa Allis shad Alosa alosa
+## 4 CNRM-CERFACS-CNRM-CM5                cn         Aa Allis shad Alosa alosa
+## 5 CNRM-CERFACS-CNRM-CM5                cn         Aa Allis shad Alosa alosa
+## 6 CNRM-CERFACS-CNRM-CM5                cn         Aa Allis shad Alosa alosa
+##   simplified_name
+## 1        A. alosa
+## 2        A. alosa
+## 3        A. alosa
+## 4        A. alosa
+## 5        A. alosa
+## 6        A. alosa
+
    +
  • tbl() only connects to the table, only a glimpse of the +data is presented
  • +
  • filter() is run by the SQL database
  • +
+
get_data_tbl <- function(conn_eurodiad, species_id, scenario) {
+  tbl(conn_eurodiad, "v_hybrid_model") %>% 
+    filter(species_id %in% !!species_id,
+           climatic_scenario %in% !!scenario) 
+}
+
+# Use it
+get_data_tbl(conn_eurodiad,
+             species_id = c(6),
+             scenario = 'rcp85')
+
## # Source:   lazy query [?? x 17]
+## # Database: postgres [diadesatlas_owner@localhost:5432/diades]
+##    hybrid_model_result_… basin_id species_id climatic_model_…  year   nit    hsi
+##                    <int>    <int>      <int>            <int> <int> <dbl>  <dbl>
+##  1               1395745        1          6                2  1951 569.  0.244 
+##  2               1395746        1          6                2  1952 486.  0.322 
+##  3               1395747        1          6                2  1953 430.  0.195 
+##  4               1395748        1          6                2  1954 384.  0.246 
+##  5               1395749        1          6                2  1955 298.  0.0850
+##  6               1395750        1          6                2  1956 209.  0.235 
+##  7               1395751        1          6                2  1957 140.  0.327 
+##  8               1395752        1          6                2  1958 101.  0.415 
+##  9               1395753        1          6                2  1959  66.9 0.246 
+## 10               1395754        1          6                2  1960  36.8 0.380 
+## # … with more rows, and 10 more variables: saturation_rate <dbl>,
+## #   climatic_scenario <chr>, climatic_model_code <chr>, oceanic_model <chr>,
+## #   atmospheric_model <chr>, climatic_model_hm <chr>, basin_name <chr>,
+## #   fish_name <chr>, latin_name <chr>, simplified_name <chr>
+
    +
  • If you need to create some {ggplot2} figures after that, you can +collect
  • +
+
get_data_tbl_collect <- function(conn_eurodiad, species_id, scenario) {
+  tbl(conn_eurodiad, "v_hybrid_model") %>% 
+    filter(species_id %in% !!species_id,
+           climatic_scenario %in% !!scenario) %>% 
+    # From collect(), the full data is in the R session
+    collect()
+}
+
+# Use it
+get_data_tbl_collect(conn_eurodiad,
+                     species_id = c(6),
+                     scenario = 'rcp85') %>% 
+  filter(basin_id == 1) %>% 
+  ggplot(aes(x = year)) +
+  geom_line(aes(y = hsi))
+

+
    +
  • Note that you can use a full SQL query with tbl()
  • +
+
get_data_tbl_query_collect <- function(conn_eurodiad, species_id, scenario) {
+  query <- "SELECT * FROM diadesatlas.v_hybrid_model"
+  
+  tbl(conn_eurodiad, sql(query)) %>% 
+    # Adds a filter to be run as SQL in the database
+    filter(species_id %in% !!species_id,
+           climatic_scenario %in% !!scenario) %>% 
+    # From collect(), the full data is in the R session
+    collect()
+}
+
+# Use it
+get_data_tbl_query_collect(conn_eurodiad,
+                           species_id = c(6),
+                           scenario = 'rcp85') %>% 
+  filter(basin_id == 1) %>% 
+  ggplot(aes(x = year)) +
+  geom_line(aes(y = hsi))
+

+
+
+

Reprex invalid ices_geom

+

Two polygons of ices_geom have invalid geometries.

+
ices_geom <- st_read(
+  conn_eurodiad,
+  query = "SELECT * FROM diadesatlas.v_ices_geom;"
+) %>%
+  st_transform("+proj=wintri") #%>%
+# rmapshaper::ms_simplify()
+
invalid_pols <- ices_geom %>% 
+  filter(!st_is_valid(.))
+
+nrow(invalid_pols)
+
## [1] 2
+

This makes the interactive map fails.

+
map_invalid <- tm_shape(invalid_pols)+
+  tm_sf()
+
+tmap_leaflet(map_invalid)
+
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1
+
## Error: Shape contains invalid polygons. Please fix it or set tmap_options(check.and.fix = TRUE) and rerun the plot
+
+
+

Disconnect from the database

+
DBI::dbDisconnect(conn_eurodiad)
+
+ + + + + + + + + + + diff --git a/data-raw/bb-page1-catch-bycatch.Rmd b/data-raw/bb-page1-catch-bycatch.Rmd index 3c73e1d..630a90c 100644 --- a/data-raw/bb-page1-catch-bycatch.Rmd +++ b/data-raw/bb-page1-catch-bycatch.Rmd @@ -45,7 +45,7 @@ vignette: > } ``` -```{r setup} +```{r, setup} # Sys.setenv("POSTGRES_USER" = "diadesatlas_owner") # Sys.setenv("POSTGRES_PASS" = "thinkrpassword") pkgload::load_all(here::here())