Skip to content

Commit

Permalink
Merge pull request #60 from atorus-research/57-mike-updates
Browse files Browse the repository at this point in the history
57 mike updates
  • Loading branch information
nicholas-masel authored Jan 24, 2025
2 parents a1b81f4 + 5477591 commit 9d29df3
Show file tree
Hide file tree
Showing 32 changed files with 1,349 additions and 144 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ jobs:
config:
- {os: windows-latest, r: 'release'}
- {os: macOS-latest, r: 'release'}
- {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-22.04, r: 'release', rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest"}
- {os: ubuntu-22.04, r: 'devel', rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest"}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
Expand Down
14 changes: 10 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: datasetjson
Type: Package
Title: Read and Write CDISC Dataset JSON Files
Version: 0.2.0
Version: 0.3.0
Authors@R: c(
person(given = "Mike",
family = "Stackhouse",
Expand All @@ -14,7 +14,7 @@ Authors@R: c(
role = "aut")
)
Description: Read, construct and write CDISC (Clinical Data Interchange Standards Consortium) Dataset JSON (JavaScript Object Notation) files, while validating per the Dataset JSON schema file, as described in CDISC (2023) <https://www.cdisc.org/dataset-json>.
URL: https://github.com/atorus-research/datasetjson
URL: https://atorus-research.github.io/datasetjson
BugReports: https://github.com/atorus-research/datasetjson/issues
Encoding: UTF-8
Language: en-US
Expand All @@ -25,13 +25,19 @@ RoxygenNote: 7.3.2
Depends: R (>= 4.0)
Imports:
yyjsonr (>= 0.1.18),
jsonvalidate (>= 1.3.1)
jsonvalidate (>= 1.3.1),
lubridate
Suggests:
testthat (>= 2.1.0),
jsonlite (>= 1.8.0),
knitr,
haven,
rmarkdown,
withr
withr,
purrr,
tibble,
dplyr,
hms,
data.table
VignetteBuilder: knitr
Config/testthat/edition: 3
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(dataset_json)
export(get_column_metadata)
export(read_dataset_json)
export(set_dataset_label)
export(set_dataset_name)
Expand All @@ -12,9 +13,11 @@ export(set_metadata_version)
export(set_originator)
export(set_source_system)
export(set_study_oid)
export(set_variable_attributes)
export(validate_dataset_json)
export(write_dataset_json)
importFrom(jsonvalidate,json_validate)
importFrom(lubridate,hms)
importFrom(tools,file_path_sans_ext)
importFrom(utils,tail)
importFrom(yyjsonr,opts_read_json)
Expand Down
95 changes: 95 additions & 0 deletions R/helpers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#' Extract column metadata to data frame
#'
#' This function pulls out the column metadata from the `datasetjson` object
#' attributes into a more user-friendly data.frame.
#'
#' @param x A datasetjson object
#'
#' @returns A data frame containing the columns metadata
#' @export
#'
#' @examples
#'
#' ds_json <- dataset_json(
#' iris,
#' item_oid = "IG.IRIS",
#' name = "IRIS",
#' dataset_label = "Iris",
#' columns = iris_items
#' )
#'
#' get_column_metadata(ds_json)
get_column_metadata <- function(x) {
stopifnot_datasetjson(x)

cols <- attributes(x)$columns

do.call(rbind, lapply(cols, cols_list_to_df))
}

#' Convert list input from Dataset JSON columns element to a dataframe.
#'
#' @param clist Dataset JSON columns element, provided as a named list
#'
#' @returns List converted to dataframe
#' @noRd
cols_list_to_df <- function(clist) {
x <- list(
itemOID = NULL,
name = NULL,
label = NULL,
dataType = NULL,
targetDataType = NA_character_,
length = NA_integer_,
displayFormat = NA_character_,
keySequence = NA_integer_
)

# Fill in the blanks
missing_names <- setdiff(names(x), names(clist))
for (n in missing_names) {
clist[n] <- x[n]
}

as.data.frame(clist)
}

#' Assign Dataset JSON attributes to data frame columns
#'
#' Using the `columns` element of the Dataset JSON file, assign the available
#' metadata to individual columns
#'
#' @param x A datasetjson object
#'
#' @returns A datasetjson object with attributes assigned to individual
#' variables
#' @export
#'
#' @examples
#'
#' ds_json <- dataset_json(
#' iris,
#' item_oid = "IG.IRIS",
#' name = "IRIS",
#' dataset_label = "Iris",
#' columns = iris_items
#' )
#'
#' ds_json <- set_variable_attributes(ds_json)
set_variable_attributes <- function(x) {
stopifnot_datasetjson(x)
cols <- attributes(x)$columns

for (l in cols) {
# Pop the name
n <- l$name
l$name <- NULL

# Loop and set the attrs
for (a in names(l)) {
attr(x[[n]], a) <- l[[a]]
}

}
x
}
14 changes: 6 additions & 8 deletions R/read_dataset_json.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,19 +86,16 @@ read_dataset_json <- function(file) {
colnames(d) <- items$name

# Process type conversions
tt <- items$dataType
dt <- items$dataType
tdt <- items$targetDataType
int_cols <- tt == "integer"
dbl_cols <- tt %in% c("float", "double", "decimal")
bool_cols <- tt == "boolean"
int_cols <- dt == "integer"
dbl_cols <- dt %in% c("float", "double", "decimal")
bool_cols <- dt == "boolean"
d[int_cols] <- lapply(d[int_cols], as.integer)
d[dbl_cols] <- lapply(d[dbl_cols], as.double)
d[bool_cols] <- lapply(d[bool_cols], as.logical)

date_cols <- tt %in% c("date") & tdt %in% "integer"
datetime_cols <- tt %in% c("datetime", "time") & tdt %in% "integer"
d[date_cols] <- lapply(d[date_cols], as.Date)
d[datetime_cols] <- lapply(d[datetime_cols], as.POSIXct)
d <- date_time_conversions(d, dt, tdt)

# Apply variable labels
d[names(d)] <- lapply(items$name, set_col_attr, d, 'label', items)
Expand Down Expand Up @@ -126,6 +123,7 @@ read_dataset_json <- function(file) {
}

attr(ds_attr, 'records') <- ds_json$records
attr(ds_attr, 'datasetJSONCreationDateTime') <- ds_json$datasetJSONCreationDateTime

ds_attr
}
25 changes: 25 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,28 @@ df_to_list_rows <- function(x) {
})
}

#' Convert date, datetime and time
#'
#' The variable attributes are stored as named lists within the output
#' JSON file, so to write them out the dataframe needs to be a named
#' list of rows
#'
#' @param d A data.frame
#' @param dt A character vector of dataTypes
#' @param tdt A character vector of targetDataTypes
#'
#' @return A data.frame with converted columns
#' @noRd
date_time_conversions <- function(d, dt, tdt){
date_cols <- dt %in% c("date") & tdt %in% "integer"
datetime_cols <- dt %in% c("datetime") & tdt %in% "integer"
time_cols <- dt %in% c("time") & tdt %in% "integer"
d[date_cols] <- lapply(d[date_cols], as.Date, tz = "UTC")
d[datetime_cols] <- lapply(d[datetime_cols],
as.POSIXct,
tz = "UTC",
tryFormats = "%Y-%m-%dT%H:%M:%S")
d[time_cols] <- lapply(d[time_cols], hms)
d
}

8 changes: 7 additions & 1 deletion R/validate_dataset_json.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@
#' validate_dataset_json('https://www.somesite.com/file.json')
#' }
#'
#' ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items)
#' ds_json <- dataset_json(
#' iris,
#' item_oid = "IG.IRIS",
#' name = "IRIS",
#' dataset_label = "Iris",
#' columns = iris_items
#' )
#' js <- write_dataset_json(ds_json)
#'
#' validate_dataset_json(js)
Expand Down
62 changes: 60 additions & 2 deletions R/write_dataset_json.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@
#'
#' @examples
#' # Write to character object
#' ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items)
#' js <- write_dataset_json(ds_json, iris_items)
#' ds_json <- dataset_json(
#' iris,
#' item_oid = "IG.IRIS",
#' name = "IRIS",
#' dataset_label = "Iris",
#' columns = iris_items
#' )
#' js <- write_dataset_json(ds_json)
#'
#' # Write to disk
#' \dontrun{
Expand All @@ -21,6 +27,47 @@
write_dataset_json <- function(x, file, pretty=FALSE) {
stopifnot_datasetjson(x)

# Find all date, datetime and time columns and convert to character
for (y in attr(x,'columns')) {

# Make sure metadata is compliant
if (y$dataType %in% c("date", "datetime", "time") & !("targetDataType" %in% names(y))) {
if (!inherits(x[[y$name]], "character")) {
stop_write_error(
y$name,
"If dataType is date, time, or datetime and targetDataType is null, the input variable type must be character"
)
}
}

if(y$dataType %in% c("date", "datetime", "time") & (!is.null(y$targetDataType) && y$targetDataType == "integer")) {
# Convert date
if (y$dataType == "date") {
x[y$name] <- format(x[[y$name]], "%Y-%m-%d", tz='UTC')
}

# Convert datetime
if (y$dataType == "datetime") {
# Ensure type and timezone is right.
if (!inherits(x[[y$name]], "POSIXt") || !("UTC" %in% attr(x[[y$name]], 'tzone'))){
stop_write_error(y$name, "Date time variable must be provided as POSIXlt type with timezone set to UTC.")
}
x[y$name] <- strftime(x[[y$name]], "%Y-%m-%dT%H:%M:%S", tz='UTC')
}

# Convert time
if (y$dataType == "time") {
if (y$dataType == "time" & !inherits(x[[y$name]], c("Period", "difftime", "ITime"))) {
stop_write_error(
y$name,
"If dataType is time and targetDataType is integer, the input variable type must be a lubridate Period, an hms difftime, or a data.table ITime object"
)
}
x[y$name] <- strftime(as.numeric(x[[y$name]]), "%H:%M:%S", tz='UTC')
}
}
}

# Populate the creation datetime
attr(x, 'datasetJSONCreationDateTime') <- get_datetime()

Expand Down Expand Up @@ -64,6 +111,7 @@ write_dataset_json <- function(x, file, pretty=FALSE) {
auto_unbox = TRUE,
)


if (!missing(file)) {
# Write file to disk
yyjsonr::write_json_file(
Expand All @@ -79,3 +127,13 @@ write_dataset_json <- function(x, file, pretty=FALSE) {
)
}
}

stop_write_error <- function(varname, msg){
stop(
sprintf(paste(
"Please check the variable %s.",
msg,
sep="\n "),
varname)
)
}
1 change: 1 addition & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#' @importFrom yyjsonr opts_write_json opts_read_json write_json_file write_json_str read_json_str read_json_file
#' @importFrom tools file_path_sans_ext
#' @importFrom utils tail
#' @importFrom lubridate hms
NULL

#' @keywords internal
Expand Down
Loading

0 comments on commit 9d29df3

Please sign in to comment.