Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR for feature #58 (access private datasets and be able to upsert and full replace datasets) #64

Closed
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(ls.socrata)
export(read.socrata)
export(read.socrataGEO)
export(validateUrl)
export(write.socrata)
importFrom(geojsonio,geojson_read)
importFrom(httr,GET)
importFrom(httr,add_headers)
Expand Down
15 changes: 11 additions & 4 deletions R/errorHandling.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,18 @@
# but one that is not compatible with RSocrata.
# See \url{https://github.com/Chicago/RSocrata/issues/16}
#
# @param url - SOPA url
# @param url - SODA url
# @param optional email - The email to the Socrata account with read access to the dataset
# @param optional password - The password associated with the email to the Socrata account
#' @importFrom httr stop_for_status GET add_headers
errorHandling <- function(url = "", app_token = NULL) {
rsp <- httr::GET(url, httr::add_headers("X-App-Token" = app_token))

errorHandling <- function(url = "", app_token = NULL, email = NULL, password = NULL) {

if(is.null(email) && is.null(password)){
rsp <- httr::GET(url, httr::add_headers("X-App-Token" = app_token))
} else { # email and password are not NULL
rsp <- httr::GET(url, httr::add_headers("X-App-Token" = app_token), httr::authenticate(email, password))
}

if (rsp$status_code == 200) {
invisible("OK. Your request was successful.")

Expand Down
19 changes: 14 additions & 5 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#' \code{http://DOMAIN/api/views/FOUR-FOUR/columns.json}, which is used here.
#'
#' @param url - A Socrata resource URL, or a Socrata "human-friendly" URL!
#' @param optional email - The email to the Socrata account with read access to the dataset
#' @param optional password - The password associated with the email to the Socrata account
#'
#' @source \url{http://stackoverflow.com/a/29782941}
#'
Expand All @@ -24,13 +26,13 @@
#' @author John Malc \email{cincenko@@outlook.com}
#'
#' @export
getMetadata <- function(url = "") {
getMetadata <- function(url = "", email = NULL, password = NULL) {

urlParsedBase <- httr::parse_url(url)
mimeType <- mime::guess_type(urlParsedBase$path)

# use function below to get them using =COUNT(*) SODA query
gQRC <- getQueryRowCount(urlParsedBase, mimeType)
gQRC <- getQueryRowCount(urlParsedBase, mimeType, email, password)

# create URL for metadata data frame
fourByFour <- substr(basename(urlParsedBase$path), 1, 9)
Expand All @@ -39,8 +41,15 @@ getMetadata <- function(url = "") {

# execute it
URL <- httr::build_url(urlParsed)
df <- jsonlite::fromJSON(URL)

if(is.null(email) && is.null(password)){
df <- jsonlite::fromJSON(URL)
} else { # email and password are not NULL
response <- httr::GET(URL, httr::authenticate(email, password))
response_content <- httr::content(response, as="text")
df <- jsonlite::fromJSON(response_content)
}

# number of rows can be sometimes "cached". If yes, then below we calculate the maximum number of
# rows from all non-null and null fields.
# If not, then it uses "getQueryRowCount" fnct with SODA =COUNT(*) SODA query.
Expand All @@ -62,7 +71,7 @@ getMetadata <- function(url = "") {
# @author Gene Leynes \email{gleynes@@gmail.com}
#
#' @importFrom httr GET build_url content
getQueryRowCount <- function(urlParsed, mimeType) {
getQueryRowCount <- function(urlParsed, mimeType, email = NULL, password = NULL) {
## Construct the count query based on the URL,
if (is.null(urlParsed[['query']])) {
## If there is no query at all, create a simple count
Expand All @@ -81,7 +90,7 @@ getQueryRowCount <- function(urlParsed, mimeType) {
urlParsed[[c('path')]], cntQueryText)

## Execute the query to count the rows
totalRowsResult <- errorHandling(cntUrl, app_token = NULL)
totalRowsResult <- errorHandling(cntUrl, app_token = NULL, email, password)

## Parsing the result depends on the mime type
if (mimeType == "application/json") {
Expand Down
10 changes: 6 additions & 4 deletions R/returnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ getContentAsDataFrame <- function(response) {
#' @param limit - defaults to the max of 50000. See \url{http://dev.socrata.com/docs/paging.html}.
#' @param domain - A Socrata domain, e.g \url{http://data.cityofchicago.org}
#' @param fourByFour - a unique 4x4 identifier, e.g. "ydr8-5enu". See more \code{\link{isFourByFour}}
#' @param optional email - The email to the Socrata account with read access to the dataset
#' @param optional password - The password associated with the email to the Socrata account
#'
#' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org}
#'
Expand All @@ -96,7 +98,7 @@ getContentAsDataFrame <- function(response) {
#'
#' @export
read.socrata <- function(url = NULL, app_token = NULL, limit = 50000, domain = NULL, fourByFour = NULL,
query = NULL) {
query = NULL, email = NULL, password = NULL) {

if (is.null(url) == TRUE) {
buildUrl <- paste0(domain, "/resource/", fourByFour, ".json")
Expand All @@ -107,16 +109,16 @@ read.socrata <- function(url = NULL, app_token = NULL, limit = 50000, domain = N
validUrl <- validateUrl(url)
parsedUrl <- httr::parse_url(validUrl)

response <- errorHandling(validUrl, app_token)
response <- errorHandling(validUrl, app_token, email, password)
results <- getContentAsDataFrame(response)
dataTypes <- getSodaTypes(response)

rowCount <- as.numeric(getMetadata(cleanQuest(validUrl))[1])
rowCount <- as.numeric(getMetadata(cleanQuest(validUrl), email, password)[1])

## More to come? Loop over pages implicitly
while (nrow(results) < rowCount) {
query_url <- paste0(validUrl, ifelse(is.null(parsedUrl$query), "?", "&"), "$offset=", nrow(results), "&$limit=", limit)
response <- errorHandling(query_url, app_token)
response <- errorHandling(query_url, app_token, email, password)
page <- getContentAsDataFrame(response)
results <- plyr::rbind.fill(results, page) # accumulate data
}
Expand Down
74 changes: 74 additions & 0 deletions R/writeData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#' Methods required for uploading data to Socrata

#' Wrap httr GET in some diagnostics
#'
#' In case of failure, report error details from Socrata.
#'
#' @param url - Socrata Open Data Application Program Interface (SODA) endpoint (JSON only for now)
#' @param json_data_to_upload - JSON encoded data to update your SODA endpoint with
#' @param http_verb - PUT or POST depending on update mode
#' @param email - email associated with Socrata account (will need write access to dataset)
#' @param password - password associated with Socrata account (will need write access to dataset)
#' @param app_token - optional app_token associated with Socrata account
#' @return httr a response object
#' @importFrom httr GET
#'
#' @noRd
checkUpdateResponse <- function(json_data_to_upload, url, http_verb, email, password, app_token = NULL) {
if(http_verb == "POST"){
response <- httr::POST(url,
body = json_data_to_upload,
httr::authenticate(email, password),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) #, verbose())
} else if(http_verb == "PUT"){
response <- httr::PUT(url,
body = json_data_to_upload,
httr::authenticate(email, password),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) # , verbose())
}

# TODO: error handling
# errorHandling(response)

return(response)
}


#' @description Method for updating Socrata datasets
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

title is missing

#'
#' @param dataframe - dataframe to upload to Socrata
#' @param dataset_json_endpoint - Socrata Open Data Application Program Interface (SODA) endpoint (JSON only for now)
#' @param update_mode - "UPSERT" or "REPLACE"; consult http://dev.socrata.com/publishers/getting-started.html
#' @param email - The email to the Socrata account with read access to the dataset
#' @param password - The password associated with the email to the Socrata account
#' @param app_token - a (non-required) string; SODA API token can be used to query the data
#' portal \url{http://dev.socrata.com/consumers/getting-started.html}
#'
#' @author Mark Silverberg \email{mark.silverberg@@socrata.com}
#'
#' @importFrom httr parse_url build_url
#' @importFrom plyr rbind.fill
#'
#' @export
write.socrata <- function(dataframe, dataset_json_endpoint, update_mode, email, password, app_token = NULL) {

# translate update_mode to http_verbs
if(update_mode == "UPSERT"){
http_verb <- "POST"
} else if(update_mode == "REPLACE") {
http_verb <- "PUT"
} else {
stop("update_mode must be UPSERT or REPLACE")
}

# convert dataframe to JSON
dataframe_as_json_string <- jsonlite::toJSON(dataframe)

# do the actual upload
response <- checkUpdateResponse(dataframe_as_json_string, dataset_json_endpoint, http_verb, email, password, app_token)

return(response)

}
6 changes: 5 additions & 1 deletion man/getMetadata.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@
\url{http://stackoverflow.com/a/29782941}
}
\usage{
getMetadata(url = "")
getMetadata(url = "", email = NULL, password = NULL)
}
\arguments{
\item{url}{- A Socrata resource URL, or a Socrata "human-friendly" URL!}

\item{optional}{email - The email to the Socrata account with read access to the dataset}

\item{optional}{password - The password associated with the email to the Socrata account}
}
\value{
a list (!) containing a number of rows & columns and a data frame of metadata
Expand Down
8 changes: 7 additions & 1 deletion man/read.socrata.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
\title{Get a full Socrata data set as an R data frame}
\usage{
read.socrata(url = NULL, app_token = NULL, limit = 50000, domain = NULL,
fourByFour = NULL, query = NULL)
fourByFour = NULL, query = NULL, email = NULL, password = NULL)
}
\arguments{
\item{url}{- A Socrata resource URL, or a Socrata "human-friendly" URL,
Expand All @@ -26,15 +26,21 @@ portal \url{http://dev.socrata.com/consumers/getting-started.html}}

\item{query}{- Based on query language called the "Socrata Query Language" ("SoQL"), see
\url{http://dev.socrata.com/docs/queries.html}.}

\item{optional}{email - The email to the Socrata account with read access to the dataset}

\item{optional}{password - The password associated with the email to the Socrata account}
}
\description{
Manages throttling and POSIX date-time conversions. We support only .json suffix.
}
\examples{
\dontrun{
df_1 <- read.socrata(url = "http://soda.demo.socrata.com/resource/4334-bgaj.csv")
df_2 <- read.socrata(domain = "http://data.cityofchicago.org/", fourByFour = "ydr8-5enu")
df_3 <- read.socrata(url = "http://data.cityofchicago.org/resource/ydr8-5enu.json")
}
}
\author{
Hugh J. Devlin, Ph. D. \email{[email protected]}
}
Expand Down
29 changes: 29 additions & 0 deletions man/write.socrata.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/writeData.R
\name{write.socrata}
\alias{write.socrata}
\usage{
write.socrata(dataframe, dataset_json_endpoint, update_mode, email, password,
app_token = NULL)
}
\arguments{
\item{dataframe}{- dataframe to upload to Socrata}

\item{dataset_json_endpoint}{- Socrata Open Data Application Program Interface (SODA) endpoint (JSON only for now)}

\item{update_mode}{- "UPSERT" or "REPLACE"; consult http://dev.socrata.com/publishers/getting-started.html}

\item{email}{- The email to the Socrata account with read access to the dataset}

\item{password}{- The password associated with the email to the Socrata account}

\item{app_token}{- a (non-required) string; SODA API token can be used to query the data
portal \url{http://dev.socrata.com/consumers/getting-started.html}}
}
\description{
Method for updating Socrata datasets
}
\author{
Mark Silverberg \email{[email protected]}
}

26 changes: 26 additions & 0 deletions tests/testthat/test-readPrivateDataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
context("Test reading private Socrata dataset with email and password")

privateResourceToReadCsvUrl <- "https://soda.demo.socrata.com/resource/a9g2-feh2.csv"
privateResourceToReadJsonUrl <- "https://soda.demo.socrata.com/resource/a9g2-feh2.json"
socrataEmail <- Sys.getenv("SOCRATA_EMAIL", "")
socrataPassword <- Sys.getenv("SOCRATA_PASSWORD", "")

test_that("read Socrata CSV that requires a login", {
# should error when no email and password are sent with the request
expect_error(read.socrata(url = privateResourceToReadCsvUrl))
# try again, this time with email and password in the request
df <- read.socrata(url = privateResourceToReadCsvUrl, email = socrataEmail, password = socrataPassword)
# tests
expect_equal(2, ncol(df), label="columns")
expect_equal(3, nrow(df), label="rows")
})

test_that("read Socrata JSON that requires a login", {
# should error when no email and password are sent with the request
expect_error(read.socrata(url = privateResourceToReadJsonUrl))
# try again, this time with email and password in the request
df <- read.socrata(url = privateResourceToReadJsonUrl, email = socrataEmail, password = socrataPassword)
# tests
expect_equal(2, ncol(df), label="columns")
expect_equal(3, nrow(df), label="rows")
})
44 changes: 44 additions & 0 deletions tests/testthat/test-writeData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
context("write Socrata datasets")

socrataEmail <- Sys.getenv("SOCRATA_EMAIL", "")
socrataPassword <- Sys.getenv("SOCRATA_PASSWORD", "")

test_that("add a row to a dataset", {
datasetToAddToUrl <- "https://soda.demo.socrata.com/resource/xh6g-yugi.json"

# populate df_in with two columns, each with a random number
x <- sample(-1000:1000, 1)
y <- sample(-1000:1000, 1)
df_in <- data.frame(x,y)

# write to dataset
write.socrata(df_in,datasetToAddToUrl,"UPSERT",socrataEmail,socrataPassword)

# read from dataset and store last (most recent) row for comparisons / tests
df_out <- read.socrata(url = datasetToAddToUrl, email = socrataEmail, password = socrataPassword)
df_out_last_row <- tail(df_out, n=1)

expect_equal(df_in$x, as.numeric(df_out_last_row$x), label = "x value")
expect_equal(df_in$y, as.numeric(df_out_last_row$y), label = "y value")
})


test_that("fully replace a dataset", {
datasetToReplaceUrl <- "https://soda.demo.socrata.com/resource/kc76-ybeq.json"

# populate df_in with two columns of random numbers
x <- sample(-1000:1000, 5)
y <- sample(-1000:1000, 5)
df_in <- data.frame(x,y)

# write to dataset
write.socrata(df_in,datasetToReplaceUrl,"REPLACE",socrataEmail,socrataPassword)

# read from dataset for comparisons / tests
df_out <- read.socrata(url = datasetToReplaceUrl, email = socrataEmail, password = socrataPassword)

expect_equal(ncol(df_in), ncol(df_out), label="columns")
expect_equal(nrow(df_in), nrow(df_out), label="rows")
expect_equal(df_in$x, as.numeric(df_out$x), label = "x values")
expect_equal(df_in$y, as.numeric(df_out$y), label = "y values")
})