diff --git a/DESCRIPTION b/DESCRIPTION index 3c1bd562..cfbbe56d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ Suggests: decor, knitr, dust, + mockery, mvtnorm, numDeriv, pkgload, diff --git a/NAMESPACE b/NAMESPACE index 7978ad0b..12bbf48e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,10 @@ # Generated by roxygen2: do not edit by hand S3method("+",mcstate_model) +S3method(cnd_footer,mcstate2_parse_error) export(mcstate_domain_expand) export(mcstate_dsl) +export(mcstate_dsl_error_explain) export(mcstate_dsl_parse_distribution) export(mcstate_model) export(mcstate_model_combine) @@ -24,4 +26,5 @@ export(mcstate_sampler_nested_random_walk) export(mcstate_sampler_random_walk) export(with_trace_random) importFrom(R6,R6Class) +importFrom(rlang,cnd_footer) useDynLib(mcstate2, .registration = TRUE) diff --git a/R/dsl-parse-error.R b/R/dsl-parse-error.R new file mode 100644 index 00000000..fbe0eb10 --- /dev/null +++ b/R/dsl-parse-error.R @@ -0,0 +1,97 @@ +##' Explain error codes produced by mcstate2. This is a work in progress, +##' and we would like feedback on what is useful as we improve it. +##' The idea is that if you see an error you can link through to get +##' more information on what it means and how to resolve it. The +##' current implementation of this will send you to the rendered +##' vignettes, but in future we will arrange for offline rendering +##' too. +##' +##' @title Explain mcstate2 error +##' +##' @param code The error code, as a string, in the form `Exxx` (a +##' capital "E" followed by three numbers) +##' +##' @return Nothing, this is called for its side effect only +##' +##' @export +mcstate_dsl_error_explain <- function(code) { + ## See odin2 for the canonical implementation of this, we're just + ## shaddowing it here really as our dsl is much simpler. Note that + ## error codes from mcstate are three numbers long, whereas they are + ## four long in odin so that it's easy (for us) to tell who the + ## error belongs to. + assert_scalar_character(code) + if (!grepl("^E[0-9]{3}$", code)) { + cli::cli_abort("Invalid code '{code}', should match 'Exxx'", + arg = "code") + } + txt <- dsl_errors[[code]] + if (is.null(txt)) { + cli::cli_abort( + c("Error '{code}' is undocumented", + i = paste("If you were directed here from an error message, please", + "let us know (e.g., file an issue or send us a message)")), + arg = "code") + } + url <- sprintf( + "https://mrc-ide.github.io/mcstate2/articles/dsl-errors.html#%s", + tolower(code)) + utils::browseURL(url) +} + + +dsl_parse_error <- function(msg, code, src, call, ..., + .envir = parent.frame()) { + stopifnot(grepl("^E[0-9]{3}$", code)) + cli::cli_abort(msg, + class = "mcstate2_parse_error", + code = code, + src = src, + call = call, + ..., + .envir = .envir) +} + + +##' @importFrom rlang cnd_footer +##' @export +cnd_footer.mcstate2_parse_error <- function(cnd, ...) { + detail <- c(">" = "In expression", + format_error_src(cnd$src)) + for (i in seq_along(cnd$context)) { + detail <- c(detail, + "", + i = names(cnd$context)[[i]], + format_error_src(cnd$context[[i]])) + } + ## Annoyingly, there's no way of marking text as whitespace + ## preserving within cli, so we need to do a substitution here for + ## "nonbreaking space" which does ok. We should also convert tabs + ## to some number of spaces, probably. + detail <- gsub(" ", "\u00a0", detail) + + + code <- cnd$code + ## See https://cli.r-lib.org/reference/links.html#click-to-run-code + ## RStudio will only run code in namespaced form + explain <- cli::format_inline( + paste("For more information, run", + "{.run mcstate2::mcstate2_dsl_error_explain(\"{code}\")}")) + c(detail, i = explain) +} + + +## TODO: this is nicer than what we do in odin. I wonder if it might +## make sense to try and have odin2 depend on the code here? +format_error_src <- function(src) { + str <- attr(src, "str", exact = TRUE) + if (is.null(str)) { + detail <- deparse(src) + } else { + ## We can adjust the formatting here later, but this will + ## hopefully be fairly nice for users. + lines <- seq(attr(src, "line"), length.out = length(str)) + detail <- sprintf("%s| %s", cli::col_grey(format(lines, width = 3)), str) + } + detail +} diff --git a/R/dsl-parse.R b/R/dsl-parse.R index 20c90a90..c43fce09 100644 --- a/R/dsl-parse.R +++ b/R/dsl-parse.R @@ -1,8 +1,8 @@ -dsl_parse <- function(exprs) { - exprs <- lapply(exprs, dsl_parse_expr) +dsl_parse <- function(exprs, call) { + exprs <- lapply(exprs, dsl_parse_expr, call) - dsl_parse_check_duplicates(exprs) - dsl_parse_check_usage(exprs) + dsl_parse_check_duplicates(exprs, call) + dsl_parse_check_usage(exprs, call) name <- vcapply(exprs, "[[", "name") parameters <- name[vcapply(exprs, "[[", "type") == "stochastic"] @@ -11,32 +11,33 @@ dsl_parse <- function(exprs) { } -dsl_parse_expr <- function(expr) { +dsl_parse_expr <- function(expr, call) { if (rlang::is_call(expr, "~")) { - dsl_parse_expr_stochastic(expr) + dsl_parse_expr_stochastic(expr, call) } else if (rlang::is_call(expr, c("<-", "="))) { - dsl_parse_expr_assignment(expr) + dsl_parse_expr_assignment(expr, call) } else { dsl_parse_error( "Unhandled expression; expected something involving '~' or '<-'", - expr) + "E101", expr, call) } } -dsl_parse_expr_stochastic <- function(expr) { +dsl_parse_expr_stochastic <- function(expr, call) { lhs <- expr[[2]] if (!rlang::is_symbol(lhs)) { ## TODO: once we support array expressions this will be relaxed a ## little to allow lhs to be 'symbol[index]' - dsl_parse_error("Expected lhs of '~' relationship to be a symbol", expr) + dsl_parse_error("Expected lhs of '~' relationship to be a symbol", + "E102", expr, call) } rhs <- expr[[3]] res <- mcstate_dsl_parse_distribution(rhs) if (!res$success) { - dsl_parse_error(res$error, expr) + dsl_parse_error(res$error, "E103", expr, call) } ## This probably requires a little more care in order to know that @@ -55,12 +56,13 @@ dsl_parse_expr_stochastic <- function(expr) { } -dsl_parse_expr_assignment <- function(expr) { +dsl_parse_expr_assignment <- function(expr, call) { lhs <- expr[[2]] if (!rlang::is_symbol(lhs)) { ## TODO: once we support array expressions this will be relaxed a ## little to allow lhs to be 'symbol[index]' - dsl_parse_error("Expected lhs of assignment to be a symbol", expr) + dsl_parse_error("Expected lhs of assignment to be a symbol", + "E104", expr, call) } rhs <- expr[[3]] ## I suspect we'll need to be quite restrictive about what @@ -74,50 +76,7 @@ dsl_parse_expr_assignment <- function(expr) { } -dsl_parse_error <- function(msg, expr, ..., envir = parent.frame(), - call = NULL) { - cli::cli_abort(msg, - ..., - expr = expr, - footer = dsl_parse_error_show_context, - .envir = envir, - class = "mcstate2_parse_error", - call = call) -} - - -dsl_parse_error_show_context <- function(cnd, ...) { - detail <- c(">" = "In expression", - format_error_expr(cnd$expr)) - for (i in seq_along(cnd$context)) { - detail <- c(detail, - "", - i = names(cnd$context)[[i]], - format_error_expr(cnd$context[[i]])) - } - ## Annoyingly, there's no way of marking text as whitespace - ## preserving within cli, so we need to do a substitution here for - ## "nonbreaking space" which does ok. We should also convert tabs - ## to some number of spaces, probably. - gsub(" ", "\u00a0", detail) -} - - -format_error_expr <- function(expr) { - str <- attr(expr, "str", exact = TRUE) - if (is.null(str)) { - detail <- deparse(expr) - } else { - ## We can adjust the formatting here later, but this will - ## hopefully be fairly nice for users. - lines <- seq(attr(expr, "line"), length.out = length(str)) - detail <- sprintf("%s| %s", cli::col_grey(format(lines, width = 3)), str) - } - detail -} - - -dsl_parse_check_duplicates <- function(exprs) { +dsl_parse_check_duplicates <- function(exprs, call) { type <- vcapply(exprs, "[[", "type") name <- vcapply(exprs, "[[", "name") i_err <- anyDuplicated(name) @@ -127,23 +86,27 @@ dsl_parse_check_duplicates <- function(exprs) { if (type[[i_err]] == type[[i_prev]]) { if (type[[i_err]] == "stochastic") { msg <- "Duplicated relationship '{name_err}'" + code <- "E201" } else { msg <- "Duplicated assignment '{name_err}'" + code <- "E202" } } else { if (type[[i_err]] == "stochastic") { msg <- "Relationship '{name_err}' shadows previous assignment" + code <- "E203" } else { msg <- "Assignment '{name_err}' shadows previous relationship" + code <- "E204" } } context <- list("Previous definition" = exprs[[i_prev]]$expr) - dsl_parse_error(msg, exprs[[i_err]]$expr, context = context) + dsl_parse_error(msg, code, exprs[[i_err]]$expr, call, context = context) } } -dsl_parse_check_usage <- function(exprs) { +dsl_parse_check_usage <- function(exprs, call) { name <- vcapply(exprs, "[[", "name") for (i in seq_along(exprs)) { e <- exprs[[i]] @@ -165,7 +128,7 @@ dsl_parse_check_usage <- function(exprs) { ## probably something rlang can do for us as it does that with ## the 'arg' argument to rlang::abort already? dsl_parse_error("Invalid use of variable{?s} {squote(err)}", - e$expr, context = context) + "E205", e$expr, call, context = context) } } } diff --git a/R/dsl-preprocess.R b/R/dsl-preprocess.R index 2156f08c..6d8cbf6e 100644 --- a/R/dsl-preprocess.R +++ b/R/dsl-preprocess.R @@ -6,16 +6,17 @@ ## able to point at individual errors _within_ the tree (so specific ## arguments etc) but that's quite hard so for now we will just hold a ## deparsed expression as an attribute of each expression. -dsl_preprocess <- function(x, type = NULL) { +dsl_preprocess <- function(x, type = NULL, call = NULL) { if (rlang::is_call(x, "quote")) { given <- rlang::expr_deparse(x) alt <- rlang::expr_deparse(x[[2]]) cli::cli_abort( c("You have an extra layer of quote() around 'x'", - i = "You passed '{given}' but probably meant to pass '{alt}'")) + i = "You passed '{given}' but probably meant to pass '{alt}'"), + call = call) } - type <- preprocess_detect(x, type) + type <- preprocess_detect(x, type, call) if (type == "expression") { if (inherits(x, "{")) { exprs <- as.list(x[-1L]) @@ -81,7 +82,7 @@ preprocess_detect <- function(x, type, call = NULL) { call = call) } } else { - cli::cli_abort("Invalid input for 'x'") + cli::cli_abort("Invalid input for 'x'", call = call) } as } diff --git a/R/dsl.R b/R/dsl.R index d02d18d4..af7e891f 100644 --- a/R/dsl.R +++ b/R/dsl.R @@ -38,13 +38,15 @@ mcstate_dsl <- function(x, type = NULL) { } else { x <- rlang::quo_get_expr(quo) } - exprs <- dsl_preprocess(x, type) - dat <- dsl_parse(exprs) + call <- environment() + exprs <- dsl_preprocess(x, type, call) + dat <- dsl_parse(exprs, call) dsl_generate(dat) } mcstate_dsl_parse <- function(x, type = NULL) { + call <- environment() quo <- rlang::enquo(x) if (rlang::quo_is_symbol(quo)) { x <- rlang::eval_tidy(quo) @@ -52,7 +54,7 @@ mcstate_dsl_parse <- function(x, type = NULL) { x <- rlang::quo_get_expr(quo) } exprs <- dsl_preprocess(x, type) - dsl_parse(exprs) + dsl_parse(exprs, call) } diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 00000000..2105b8e2 Binary files /dev/null and b/R/sysdata.rda differ diff --git a/_pkgdown.yml b/_pkgdown.yml index 5680e7de..5589264d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -13,6 +13,7 @@ reference: - title: Domain specific language contents: - mcstate_dsl + - mcstate_dsl_error_explain - dsl-distributions - subtitle: Advanced diff --git a/man/mcstate_dsl_error_explain.Rd b/man/mcstate_dsl_error_explain.Rd new file mode 100644 index 00000000..ac81d1db --- /dev/null +++ b/man/mcstate_dsl_error_explain.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dsl-parse-error.R +\name{mcstate_dsl_error_explain} +\alias{mcstate_dsl_error_explain} +\title{Explain mcstate2 error} +\usage{ +mcstate_dsl_error_explain(code) +} +\arguments{ +\item{code}{The error code, as a string, in the form \code{Exxxx} (a +capital "E" followed by four numbers)} +} +\value{ +Nothing, this is called for its side effect only +} +\description{ +Explain error codes produced by mcstate2. This is a work in progress, +and we would like feedback on what is useful as we improve it. +The idea is that if you see an error you can link through to get +more information on what it means and how to resolve it. The +current implementation of this will send you to the rendered +vignettes, but in future we will arrange for offline rendering +too. +} diff --git a/scripts/update_parse_errors b/scripts/update_parse_errors new file mode 100755 index 00000000..8af5d42c --- /dev/null +++ b/scripts/update_parse_errors @@ -0,0 +1,34 @@ +#!/usr/bin/env Rscript + +read_errors <- function() { + path_rmd <- "vignettes/dsl-errors.Rmd" + txt <- readLines(path_rmd) + + re <- "^# `(E[0-9]{3})`$" + i <- grep(re, txt) + if (length(setdiff(grep("^# ", txt), i)) > 0) { + stop("Some headings don't match expected pattern") + } + + f <- function(from, to) { + ret <- txt[from:to] + while (ret[[1]] == "") { + ret <- ret[-1] + } + while (ret[[length(ret)]] == "") { + ret <- ret[-length(ret)] + } + ret + } + + ret <- Map(f, i + 1, c(i[-1] - 1, length(txt))) + names(ret) <- sub(re, "\\1", txt[i]) + ret +} + + +## We might parse this further, e.g., with commonmark, so that we can +## render this nicely to the console as cli would make this pretty +## easy really. +dsl_errors <- read_errors() +save(list = "dsl_errors", file = file.path("R/sysdata.rda"), version = 2) diff --git a/tests/testthat/test-dsl-parse.R b/tests/testthat/test-dsl-parse.R index a7c11225..5b43060b 100644 --- a/tests/testthat/test-dsl-parse.R +++ b/tests/testthat/test-dsl-parse.R @@ -1,8 +1,8 @@ test_that("can throw sensible errors with expression information", { expr <- structure(quote(a + b), line = 10, str = "a+b") msg <- "some error message" - err <- expect_error(dsl_parse_error(msg, expr), "some error message") - expect_equal(err$expr, expr) + err <- expect_error(dsl_parse_error(msg, "E001", expr), "some error message") + expect_equal(err$src, expr) expect_match(cli::ansi_strip(conditionMessage(err)), "In expression\n 10| a+b", fixed = TRUE) }) @@ -11,8 +11,8 @@ test_that("can throw sensible errors with expression information", { test_that("can throw sensible errors without expression information", { expr <- quote(a + b) msg <- "some error message" - err <- expect_error(dsl_parse_error(msg, expr), "some error message") - expect_equal(err$expr, expr) + err <- expect_error(dsl_parse_error(msg, "E100", expr), "some error message") + expect_equal(err$src, expr) }) @@ -55,7 +55,7 @@ test_that("prevent models that imply duplicated parameters", { res <- expect_error( mcstate_dsl_parse("a~Normal(0,1)\na ~ Uniform( 0, 1 )"), "Duplicated relationship 'a'") - expect_equal(res$expr, + expect_equal(res$src, structure(quote(a ~ Uniform(0, 1)), line = 2, str = "a ~ Uniform( 0, 1 )")) expect_equal(names(res$context), "Previous definition") @@ -89,7 +89,7 @@ test_that("variables are not used out of order", { a <- Normal(0, 1) }), "Invalid use of variable 'a'") - expect_equal(res$expr, quote(b <- Normal(a, 1))) + expect_equal(res$src, quote(b <- Normal(a, 1))) expect_equal(res$context, list("'a' is defined later:" = quote(a <- Normal(0, 1)))) }) @@ -102,7 +102,7 @@ test_that("variables must be defined somewhere", { b <- Normal(a, sd) }), "Invalid use of variable 'sd'") - expect_equal(res$expr, quote(b <- Normal(a, sd))) + expect_equal(res$src, quote(b <- Normal(a, sd))) }) @@ -198,3 +198,35 @@ test_that("report back invalid distribution calls", { "i" = "Call should match:", "*" = "mean, sd")) }) + + +test_that("can explain an error", { + skip_if_not_installed("mockery") + mock_browse <- mockery::mock() + mockery::stub(mcstate_dsl_error_explain, "utils::browseURL", mock_browse) + mcstate_dsl_error_explain("E101") + mockery::expect_called(mock_browse, 1) + expect_equal( + mockery::mock_args(mock_browse)[[1]], + list("https://mrc-ide.github.io/mcstate2/articles/dsl-errors.html#e101")) +}) + + +test_that("error if given invalid code", { + msg <- "Invalid code 'E01', should match 'Exxx'" + expect_error(mcstate_dsl_error_explain("E01"), + "Invalid code 'E01', should match 'Exxx'") + expect_error(mcstate_dsl_error_explain("e0001"), + "Invalid code 'e0001', should match 'Exxx'") + expect_error(mcstate_dsl_error_explain("E0001"), + "Invalid code 'E0001', should match 'Exxx'") + expect_error(mcstate_dsl_error_explain("anything"), + "Invalid code 'anything', should match 'Exxx'") +}) + + +test_that("error if given unknown code", { + expect_error( + mcstate_dsl_error_explain("E999"), + "Error 'E999' is undocumented") +}) diff --git a/vignettes/dsl-errors.Rmd b/vignettes/dsl-errors.Rmd new file mode 100644 index 00000000..13eb49f1 --- /dev/null +++ b/vignettes/dsl-errors.Rmd @@ -0,0 +1,123 @@ +--- +title: "DSL parse errors" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{DSL parse errors} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This vignette outlines errors that might be generated when parsing mcstate2 DSL code, with more explanation about the error and how they can be avoided. Don't read this top to bottom as it's quite boring! However, if we get errors that benefit from more explanation about why they've been thrown then we'll expand on the contents here and arrange for these to be linked from the thrown error directly. + +The error numbers are arbitrary after the first digit. The first digit will correspond to different phases of the parsing: + +* `E1xx` - errors during parsing of individual expressions +* `E2xx` - errors when considering the system as a whole + +# `E101` + +We found an expression that is neither an assignment (with `<-`) or a stochastic relationship (with `~`) + +Example: + +``` +a + 1 +``` + +# `E102` + +Invalid left hand side of a relationship operator (`~`). Currently the left hand side must be symbol, though this will be relaxed in future once we support an array syntax. However, you may not use things like numbers or function calls on the left hand side. + +Example: + +``` +1 <- 2 +f(x) <- g(y) +``` + +# `E103` + +Your distribution call failed to parse. This can fail for many reasons, and the details of the failure come from `mcstate2::mcstate_dsl_parse_distribution` + +Example reasons for failure include the the rhs being: + +* not a call (e.g., `compare(x) ~ 1` +* not a call to distribution function (e.g., `compare(x) ~ sqrt(2)`) +* an invalid call (e.g., `compare(x) ~ Normal(0, 1, 2)`) + +The details for the failure will be included in the body of the error message. + +# `E104` + +Invalid left hand side of an assignment with `<-`; this is similar to `E102`. + +# `E201` + +Duplicated relationships (with `~`). + +Example: + +``` +a ~ Normal(0, 1) +b ~ Uniform(0, 1) +a ~ Exponential(1) # <= error here +``` + +Relationships must be unique becaue each represents a parameter, and a parameter can't be represented by two different distributions. + +# `E202` + +Duplicated assigments (with `<-`). This is similar to `E201` + +Example: + +``` +a <- 1 +b <- 2 +a <- 3 # <= error here +``` + +Assignments must be unique within the dsl code because makes it straightforward to trace usage through the dependency graph and from this create a gradient function using automatic differentation. + +This restriction means that you cannot reassign a value either, so this is an error: + +``` +a <- 1 +b <- 10 +a <- a + b # <= error here +``` + +# `E203` + +A relationship (with `~`) is shaddowing a previous assignment. So after assigning to a variable you have declared that the same symbol refers to a parameter. + +Example: + +``` +a <- 1 +a ~ Normal(0, 1) # <= error here +``` + +# `E204` + +An assignment (with `<-`) is shaddowing a previous relationship. + +Example: + +``` +a ~ Normal(0, 1) +a <- 10 +``` + +# `E205` + +Variables are used out of order. If you are using odin this is a big departure - at the moment you must declare your expressions (assignments and relationships) in order. However, because we forbid multiple assignment we may relax this in the future, but no existing programmes should be changed.