Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/ipeaGIT/geobr
Browse files Browse the repository at this point in the history
# Conflicts:
#	r-package/DESCRIPTION
  • Loading branch information
rafapereirabr committed Mar 24, 2024
2 parents 87be5f7 + 5d79577 commit 173a7b1
Show file tree
Hide file tree
Showing 12 changed files with 212 additions and 187 deletions.
38 changes: 24 additions & 14 deletions data_prep/R/health_facilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,23 @@ update_health_facilities <- function(){
'date_update', 'year_update'))



# deal with points with missing coordinates
head(dt)
# dt[is.na(lat) | is.na(lon),]
# dt[lat==0,]
dt[is.na(lat) | is.na(lon),]
dt[lat==0,]

# identify which points should have empty geo
dt[is.na(lat) | is.na(lon), empty_geo := T]

dt[code_cnes=='0000930', lat]
dt[code_cnes=='0000930', lon]

# dt[code_cnes=='0000930', lat]
# dt[code_cnes=='0000930', lon]
#
# # replace NAs with 0
# data.table::setnafill(dt,
# type = "const",
# fill = 0,
# cols=c("lat","lon")
# )
# replace NAs with 0
data.table::setnafill(dt,
type = "const",
fill = 0,
cols=c("lat","lon")
)



Expand All @@ -107,18 +110,25 @@ update_health_facilities <- function(){
crs = "+proj=longlat +datum=WGS84")


# convert to point empty
# solution from: https://gis.stackexchange.com/questions/459239/how-to-set-a-geometry-to-na-empty-for-some-features-of-an-sf-dataframe-in-r
temp_sf$geometry[temp_sf$empty_geo == T] = sf::st_point()

subset(temp_sf, code_cnes=='0000930')


# Change CRS to SIRGAS Geodetic reference system "SIRGAS2000" , CRS(4674).
temp_sf <- harmonize_projection(temp_sf)


# create folder to save the data
dest_dir <- paste0('./data/health_facilities/', geobr_date)
dest_dir <- paste0('./data/health_facilities/', geobr_date,'/')
dir.create(path = dest_dir, recursive = TRUE, showWarnings = FALSE)


# Save raw file in sf format
sf::st_write(temp_sf,
dsn= paste0(dest_dir, 'cnes_', date_update,".gpkg"),
dsn= paste0(dest_dir, 'cnes_', geobr_date,".gpkg"),
overwrite = TRUE,
append = FALSE,
delete_dsn = T,
Expand Down
130 changes: 130 additions & 0 deletions data_prep/R/schools.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#> DATASET: schools 2020
#> Source: INEP -
#> https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/inep-data/catalogo-de-escolas
#>
#: scale
#> Metadata:
# Titulo: schools
#' Frequencia de atualizacao: anual
#'
#' Forma de apresentação: Shape
#' Linguagem: Pt-BR
#' Character set: Utf-8
#'
#' Resumo: Pontos com coordenadas gegráficas das escolas do censo escolar
#' Informações adicionais: Dados produzidos pelo INEP. Os dados de escolas e sua
#' geolocalização são atualizados pelo INEP continuamente. Para finalidade do geobr,
#' esses dados precisam ser baixados uma vez ao ano




update_schools <- function(){


# If the data set is updated regularly, you should create a function that will have
# a `date` argument download the data
update <- 2023
date_update <- Sys.Date()

# date shown to geobr user
geobr_date <- gsub('-', '' , date_update)
geobr_date <- substr(geobr_date, 1, 6)


# download manual
# https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/inep-data/catalogo-de-escolas
dt <- fread('C:/Users/r1701707/Downloads/Análise - Tabela da lista das escolas - Detalhado.csv',
encoding = 'UTF-8')
head(dt)


##### 4. Rename columns -------------------------
head(dt)

df <- dplyr::select(dt,
abbrev_state = 'UF',
name_muni = 'Município',
code_school = 'Código INEP',
name_school = 'Escola',
education_level = 'Etapas e Modalidade de Ensino Oferecidas',
education_level_others = 'Outras Ofertas Educacionais',
admin_category = 'Categoria Administrativa',
address = 'Endereço',
phone_number = 'Telefone',
government_level = 'Dependência Administrativa',
private_school_type = 'Categoria Escola Privada',
private_government_partnership = 'Conveniada Poder Público',
regulated_education_council = 'Regulamentação pelo Conselho de Educação',
service_restriction ='Restrição de Atendimento',
size = 'Porte da Escola',
urban = 'Localização',
location_type = 'Localidade Diferenciada',
date_update = 'date_update',
y = 'Latitude',
x = 'Longitude'
)




head(df)


# add update date columns
df[, date_update := as.character(date_update)]


# deal with points with missing coordinates
head(df)
df[is.na(x) | is.na(y),]
df[x==0,]

# identify which points should have empty geo
df[is.na(x) | is.na(y), empty_geo := T]

df[code_school=='11000180', x]


# replace NAs with 0
data.table::setnafill(df,
type = "const",
fill = 0,
cols=c("x","y")
)



# Convert originl data frame into sf
temp_sf <- sf::st_as_sf(x = df,
coords = c("x", "y"),
crs = "+proj=longlat +datum=WGS84")


# convert to point empty
# solution from: https://gis.stackexchange.com/questions/459239/how-to-set-a-geometry-to-na-empty-for-some-features-of-an-sf-dataframe-in-r
temp_sf$geometry[temp_sf$empty_geo == T] = sf::st_point()

subset(temp_sf, code_school=='11000180')


# Change CRS to SIRGAS Geodetic reference system "SIRGAS2000" , CRS(4674).
temp_sf <- harmonize_projection(temp_sf)


# create folder to save the data
dest_dir <- paste0('./data/schools/', update,'/')
dir.create(path = dest_dir, recursive = TRUE, showWarnings = FALSE)


# Save raw file in sf format
sf::st_write(temp_sf,
dsn= paste0(dest_dir, 'schools_', update,".gpkg"),
overwrite = TRUE,
append = FALSE,
delete_dsn = T,
delete_layer = T,
quiet = T
)

}
6 changes: 3 additions & 3 deletions data_prep/R/support_fun.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#### Support functions to use in the preprocessing of the data

# library(dplyr)
# library(data.table)
library(dplyr)
library(data.table)
# library(mapview)
# mapviewOptions(platform = 'deckgl')

Expand Down Expand Up @@ -146,7 +146,7 @@ add_region_info <- function(temp_sf, column){
code_region==2, 'Nordeste',
code_region==3, 'Sudeste',
code_region==4, 'Sul',
code_region==5, 'Centro Oeste',
code_region==5, 'Centro-Oeste',
default = NA))
return(temp_sf)
}
Expand Down
1 change: 1 addition & 0 deletions r-package/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Suggests:
rmarkdown,
scales,
testthat
RoxygenNote: 7.3.1
RoxygenNote: 7.2.3
Roxygen: list(markdown = TRUE)
VignetteBuilder: knitr
12 changes: 12 additions & 0 deletions r-package/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# geobr v1.9.0

**Major changes**

- Function `read_health_facilities()` now has a new parameter `date`, which will allow users to access data for different dates of reference. The plan is to have at least one update of this data set per year.


**New data**
- schools for 2023
- health facilities for 202303


# geobr v1.8.2

**CRAN request**
Expand Down
9 changes: 6 additions & 3 deletions r-package/R/read_health_facilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#' update is registered in the database in the columns `date_update` and
#' `year_update`. More information in the CNES data set available at \url{https://dados.gov.br/}.
#' These data use Geodetic reference system "SIRGAS2000" and CRS(4674).
#'
#' @param date Numeric. Date of the data in YYYYMM format. Defaults to `202303`,
#' which was the latest data available by the time of this update.
#' @template showProgress
#'
#' @return An `"sf" "data.frame"` object
Expand All @@ -25,12 +28,12 @@
#'
#' @examplesIf identical(tolower(Sys.getenv("NOT_CRAN")), "true")
#' # Read all health facilities of the whole country
#' h <- read_health_facilities()
#' h <- read_health_facilities( date = 202303)
#'
read_health_facilities <- function( showProgress=TRUE ){
read_health_facilities <- function(date = 202303, showProgress = TRUE){

# Get metadata with data url addresses
temp_meta <- select_metadata(geography="health_facilities", year=2015, simplified=F)
temp_meta <- select_metadata(geography="health_facilities", year=date, simplified=F)

# list paths of files to download
file_url <- as.character(temp_meta$download_path)
Expand Down
6 changes: 3 additions & 3 deletions r-package/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ select_data_type <- function(temp_meta, simplified=NULL){
select_year_input <- function(temp_meta, y=year){

# NULL
if (is.null(y)){ stop(paste0("Error: Invalid Value to argument 'year'. It must be one of the following: ",
if (is.null(y)){ stop(paste0("Error: Invalid Value to argument 'year/date'. It must be one of the following: ",
paste(unique(temp_meta$year),collapse = " "))) }

# invalid input
else if (y %in% temp_meta$year){ message(paste0("Using year ", y))
else if (y %in% temp_meta$year){ message(paste0("Using year/date ", y))
temp_meta <- subset(temp_meta, year == y)
return(temp_meta) }

# invalid input
else { stop(paste0("Error: Invalid Value to argument 'year'. It must be one of the following: ",
else { stop(paste0("Error: Invalid Value to argument 'year/date'. It must be one of the following: ",
paste(unique(temp_meta$year), collapse = " ")))
}
}
Expand Down
1 change: 0 additions & 1 deletion r-package/man/geobr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions r-package/man/read_health_facilities.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 173a7b1

Please sign in to comment.