Updated news and made some QOL improvements

ncn-foreigners · Jan 19, 2024 · 838f011 · 838f011
1 parent b7fd415
commit 838f011
Show file tree

Hide file tree

Showing 12 changed files with 141 additions and 112 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -28,7 +28,8 @@ Imports:
     lamW,
     mathjaxr,
     sandwich,
-    doParallel,
+    doSNOW,
+    progress,
     foreach,
     parallel
 Suggests: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -66,7 +66,7 @@ export(ztoinegbin)
 export(ztoipoisson)
 export(ztpoisson)
 import(mathjaxr)
-importFrom(doParallel,registerDoParallel)
+importFrom(doSNOW,registerDoSNOW)
 importFrom(foreach,"%dopar%")
 importFrom(foreach,foreach)
 importFrom(graphics,abline)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# singleRcapture 0.2.1.2
+
+* Bugfix for interaction terms in formula not being considered
+* Switched parallel backed to `doSNOW` form `doParallel`
+* Introduced progress bars to parallelized parts of code
+
 # singleRcapture 0.2.1.1
 
 * Bugfix for tests failing with `noLongDouble`

diff --git a/R/Internals.R b/R/Internals.R
@@ -89,7 +89,8 @@ singleRcaptureinternalpopulationEstimate <- function(y, X, grad, # check if some
         method = control$fittingMethod,
         controlBootstrapMethod = control$bootstrapFitcontrol,
         N = N, Xvlm = Xvlm, modelFrame = modelFrame,
-        offset = offset, weightsFlag = weightsFlag
+        offset = offset, weightsFlag = weightsFlag,
+        visT = control$bootstrapVisualTrace
       )
     } else {
       funBoot <- switch(

diff --git a/R/Methods.R b/R/Methods.R
@@ -645,7 +645,7 @@ hatvalues.singleRStaticCountData <- function(model, ...) {
 #' @importFrom foreach foreach
 #' @importFrom parallel makeCluster
 #' @importFrom parallel stopCluster
-#' @importFrom doParallel registerDoParallel
+#' @importFrom doSNOW registerDoSNOW
 #' @rdname regDiagSingleR
 #' @exportS3Method 
 dfbeta.singleRStaticCountData <- function(model,
@@ -665,13 +665,17 @@ dfbeta.singleRStaticCountData <- function(model,
 
   if (cores > 1) {
     cl <- parallel::makeCluster(cores)
-    doParallel::registerDoParallel(cl)
+    pb <- progress::progress_bar$new(total = NROW(X))
+
+    opts <- if (trace) list(progress = \(n) pb$tick()) else NULL
+    doSNOW::registerDoSNOW(cl)
     on.exit(parallel::stopCluster(cl))
     #parallel::clusterExport(cl, c("singleRinternalGetXvlmMatrix", "cf", "y", "X", "maxitNew", "model", "pw", "offset", "eta"), envir = environment())
 
     if (isFALSE(model$control$controlModel$weightsAsCounts)) {
       res <- foreach::`%dopar%`(
-        obj = foreach::foreach(k = 1:NROW(X), .combine = rbind),
+        obj = foreach::foreach(k = 1:NROW(X), .combine = rbind,
+                               .options.snow = opts),
         ex = {
           c(cf - estimatePopsizeFit(
             control = controlMethod(
@@ -696,7 +700,8 @@ dfbeta.singleRStaticCountData <- function(model,
       )
     } else {
       res <- foreach::`%dopar%`(
-        obj = foreach::foreach(k = 1:NROW(X), .combine = rbind),
+        obj = foreach::foreach(k = 1:NROW(X), .combine = rbind,
+                               .options.snow = opts),
         ex = {
           if (isFALSE(pw[k] - 1 > 0)) {
             c(cf - estimatePopsizeFit(

diff --git a/R/bootstraps.R b/R/bootstraps.R
@@ -131,11 +131,11 @@ noparBoot <- function(family, formulas, y, X, modelFrame,
 #' @importFrom foreach foreach
 #' @importFrom parallel makeCluster
 #' @importFrom parallel stopCluster
-#' @importFrom doParallel registerDoParallel
+#' @importFrom doSNOW registerDoSNOW
 noparBootMultiCore <- function(family, formulas, y, X, modelFrame,
                                 beta, weights, trcount, numboot,
                                 eta, cores, controlBootstrapMethod = NULL,
-                                method, N, offset, weightsFlag, ...) {
+                                method, N, offset, weightsFlag, visT, ...) {
   n <- length(y)
   if (isTRUE(weightsFlag)) {
     n <- sum(weights)
@@ -147,14 +147,18 @@ noparBootMultiCore <- function(family, formulas, y, X, modelFrame,
   }
 
   cl <- parallel::makeCluster(cores)
-  doParallel::registerDoParallel(cl)
+  pb <- progress::progress_bar$new(total = numboot)
+
+  opts <- if (visT) list(progress = \(n) pb$tick()) else NULL
+  doSNOW::registerDoSNOW(cl)
   on.exit(parallel::stopCluster(cl))
 
 
   ### TODO:: This gives a different results for family = "chao" and "zelterman"
   ### when compared to non paralelized version
   strappedStatistic <- foreach::`%dopar%`(
-    obj = foreach::foreach(k = 1:numboot, .combine = c),
+    obj = foreach::foreach(k = 1:numboot, .combine = c,
+                           .options.snow = opts),
     #obj = foreach::foreach(k = 1:numboot, .export = "singleRcaptureinternalIRLSmultipar"),
     ex = {
       theta <- NULL
@@ -320,11 +324,11 @@ semparBoot <- function(family, formulas, y, X, beta,
 #' @importFrom foreach foreach
 #' @importFrom parallel makeCluster
 #' @importFrom parallel stopCluster
-#' @importFrom doParallel registerDoParallel
+#' @importFrom doSNOW registerDoSNOW
 semparBootMultiCore <- function(family, formulas, y, X, modelFrame,
                                beta, weights, trcount, numboot,
                                eta, cores, controlBootstrapMethod = NULL,
-                               method, N, offset, weightsFlag, ...) {
+                               method, N, offset, weightsFlag, visT, ...) {
   n <- length(y)
   if (isTRUE(weightsFlag)) {
     n <- sum(weights)
@@ -338,11 +342,15 @@ semparBootMultiCore <- function(family, formulas, y, X, modelFrame,
   N <- round(sum(N))
 
   cl <- parallel::makeCluster(cores)
-  doParallel::registerDoParallel(cl)
+  pb <- progress::progress_bar$new(total = numboot)
+
+  opts <- if (visT) list(progress = \(n) pb$tick()) else NULL
+  doSNOW::registerDoSNOW(cl)
   on.exit(parallel::stopCluster(cl))
 
   strappedStatistic <- foreach::`%dopar%`(
-    obj = foreach::foreach(k = 1:numboot, .combine = c),
+    obj = foreach::foreach(k = 1:numboot, .combine = c,
+                           .options.snow = opts),
     ex = {
       theta <- NULL
       while (is.null(theta)) {
@@ -558,12 +566,12 @@ parBoot <- function(family, formulas, y, X, beta, weights,
 #' @importFrom foreach foreach
 #' @importFrom parallel makeCluster
 #' @importFrom parallel stopCluster
-#' @importFrom doParallel registerDoParallel
+#' @importFrom doSNOW registerDoSNOW
 #' @importFrom stats rbinom
 parBootMultiCore <- function(family, formulas, y, X, modelFrame,
                              beta, weights, trcount, numboot,
                              eta, cores, controlBootstrapMethod = NULL,
-                             method, N, offset, weightsFlag, ...) {
+                             method, N, offset, weightsFlag, visT, ...) {
   n <- length(y)
   if (isTRUE(weightsFlag)) {
     n <- sum(weights)
@@ -590,14 +598,18 @@ parBootMultiCore <- function(family, formulas, y, X, modelFrame,
   prob <- contr / N
 
   cl <- parallel::makeCluster(cores)
-  doParallel::registerDoParallel(cl)
+  pb <- progress::progress_bar$new(total = numboot)
+
+  opts <- if (visT) list(progress = \(n) pb$tick()) else NULL
+  doSNOW::registerDoSNOW(cl)
   on.exit(parallel::stopCluster(cl))
   #doRNG::registerDoRNG()
 
   ### TODO:: This gives a different results for family = "chao" and "zelterman"
   ### when compared to non paralelized version
   strappedStatistic <- foreach::`%dopar%`(
-    obj = foreach::foreach(k = 1:numboot, .combine = c),
+    obj = foreach::foreach(k = 1:numboot, .combine = c,
+                           .options.snow = opts),
     ex = {
       theta <- NULL
       while (is.null(theta)) {

diff --git a/R/control.R b/R/control.R
@@ -198,7 +198,7 @@ controlModel <- function(weightsAsCounts = FALSE,
 #' respective standard error and variance estimation.
 #'
 #' @param alpha significance level, 0.05 used by default.
-#' @param cores For bootstrap only, number of processor cores to be used,
+#' @param cores for bootstrap only, number of processor cores to be used,
 #' any number greater than 1 activates code designed with \code{doParallel}, 
 #' \code{foreach} and \code{parallel} packages. Note that for now using parallel
 #' computing makes tracing impossible so \code{traceBootstrapSize} and 
@@ -214,7 +214,8 @@ controlModel <- function(weightsAsCounts = FALSE,
 #' @param traceBootstrapSize boolean value indicating whether to print size of 
 #' bootstrapped sample after truncation for semi- and fully parametric bootstraps.
 #' @param bootstrapVisualTrace boolean value indicating whether to plot bootstrap 
-#' statistics in real time.
+#' statistics in real time if \code{cores = 1} if \code{cores > 1} it instead
+#' indicates whether to make progress bar.
 #' @param fittingMethod method used for fitting models from bootstrap samples.
 #' @param bootstrapFitcontrol control parameters for each regression works exactly 
 #' like \code{controlMethod} but for fitting models from bootstrap samples.

diff --git a/R/documentationFiles.R b/R/documentationFiles.R
@@ -269,7 +269,8 @@ NULL
 #' \code{foreach} and \code{parallel} packages. Note that for now using parallel 
 #' computing makes tracing impossible so \code{trace} parameter is ignored in this case.
 #' @param type a type of residual to return.
-#' @param trace logical value specifying whether to tracking results.
+#' @param trace logical value specifying whether to tracking results when
+#' \code{cores > 1} it will result in a progress bar being created.
 #' @param maxitNew maximal number of iterations for regressions with starting 
 #' points \mjseqn{\hat{\boldsymbol{\beta}}} on data 
 #' specified at call for \code{model} after the removal of k'th row. By default 1.

diff --git a/R/estimatePopsize.R b/R/estimatePopsize.R
@@ -758,91 +758,91 @@ estimatePopsize.default <- function(formula,
     )
   } else {
     stop("Ratio regression is not yet implemented")
-    ff <- formula
-    # TODO make this inherit from family
-    a <- function(x) {x+1}
-    if (length(ff) == 3) {ff[[3]] <- 1}
-    modelFrame <- stats::model.frame(ff, data, ...)
-
-    observed <- modelFrame |>
-      model.response() |>
-      as.vector() # dropping names, won't be needed
-
-    delete <- modelFrame |>
-      attr("names")
-
-    ff <- log(r) ~ 1
-    ff[[3]] <- formula[[3]]
-
-    counts <- table(observed)
-
-    if (TRUE) {
-      r <- sapply(1:(max(observed)-1), function(x) {
-        family$ratioFunc(x) * counts[as.character(x+1)] / counts[as.character(x)]
-      }) |> as.vector()
-
-      if (!is.null(weights)) {
-        priorWeights <- as.numeric(weights)
-      } else {
-        priorWeights <- rep(1, nrow(modelFrame))
-      }
-
-      if (TRUE) {
-        weights <- (1/counts[1:(max(observed)-1)] + 1/counts[2:max(observed)]) ^ -1
-        #weights <- priorWeights * weights
-      } else {
-        # weighting outgh to be optional
-      }
-
-      # maybe include data here
-      linearModel <- lm(ff, data = data.frame(
-                        r = r, x = 1:(max(observed) - 1)
-                        ), weights = weights, ...)
-
-      fitRatio <- predict(linearModel, data.frame(x = 0:(max(observed)-1)))
-      fitRatio <- exp(fitRatio)
-
-      # first est for N
-      N <- sum(counts) / (1 - 1 / sum(c(1, cumprod(fitRatio))))
-      # second est for N
-      N <- c(N, sum(counts) + family$ratioFunc(0)*counts["1"] / fitRatio[1])
-      names(N) <- c("ht", "reg")
-      f0 <- N - sum(counts)
-
-      # TODO:: idk if this applies to HT estimate
-      variation <- model.matrix(ff, model.frame(ff, data.frame(x = 0, r = 0)))
-      variation <- f0^2 * as.vector(variation %*% vcov(linearModel) %*% t(variation))
-      ## TODO -- this is an approximation
-      # we're assuming var(counts["1"]) ~~ counts["1"]
-      # this can be made better
-      variation <- variation + counts["1"] * (a(0) ^ 2) * 
-        exp(-predict(linearModel, data.frame(x = 0))) ^ 2
-
-      sd <- sqrt(variation)
-      sc <- qnorm(p = 1 - .05 / 2)
-
-
-      confidenceInterval <- 
-        data.frame(lowerBound = pmax(N - sc * sd, sum(counts)), 
-                   upperBound = N + sc * sd)
-      print(N)
-      print(confidenceInterval)
-
-    } else {
-      ### TODO
-      # put observed likelihood method here
-    }
-
-    structure(
-      list(
-        y                = if(isTRUE(returnElements[[1]])) as.numeric(observed) else NULL, # drop names
-        X                = if(isTRUE(returnElements[[2]])) variables else NULL,
-        modelFrame       = if (isTRUE(returnElements[[3]])) modelFrame else NULL,
-        formula          = formula,
-        call             = match.call(),
-        coefficients     = coefficients
-      ),
-      class = c("singleRRatioReg", "singleR", "glm", "lm")
-    )
+    # ff <- formula
+    # # TODO make this inherit from family
+    # a <- function(x) {x+1}
+    # if (length(ff) == 3) {ff[[3]] <- 1}
+    # modelFrame <- stats::model.frame(ff, data, ...)
+    # 
+    # observed <- modelFrame |>
+    #   model.response() |>
+    #   as.vector() # dropping names, won't be needed
+    # 
+    # delete <- modelFrame |>
+    #   attr("names")
+    # 
+    # ff <- log(r) ~ 1
+    # ff[[3]] <- formula[[3]]
+    # 
+    # counts <- table(observed)
+    # 
+    # if (TRUE) {
+    #   r <- sapply(1:(max(observed)-1), function(x) {
+    #     family$ratioFunc(x) * counts[as.character(x+1)] / counts[as.character(x)]
+    #   }) |> as.vector()
+    #   
+    #   if (!is.null(weights)) {
+    #     priorWeights <- as.numeric(weights)
+    #   } else {
+    #     priorWeights <- rep(1, nrow(modelFrame))
+    #   }
+    #   
+    #   if (TRUE) {
+    #     weights <- (1/counts[1:(max(observed)-1)] + 1/counts[2:max(observed)]) ^ -1
+    #     #weights <- priorWeights * weights
+    #   } else {
+    #     # weighting outgh to be optional
+    #   }
+    #   
+    #   # maybe include data here
+    #   linearModel <- lm(ff, data = data.frame(
+    #                     r = r, x = 1:(max(observed) - 1)
+    #                     ), weights = weights, ...)
+    #   
+    #   fitRatio <- predict(linearModel, data.frame(x = 0:(max(observed)-1)))
+    #   fitRatio <- exp(fitRatio)
+    #   
+    #   # first est for N
+    #   N <- sum(counts) / (1 - 1 / sum(c(1, cumprod(fitRatio))))
+    #   # second est for N
+    #   N <- c(N, sum(counts) + family$ratioFunc(0)*counts["1"] / fitRatio[1])
+    #   names(N) <- c("ht", "reg")
+    #   f0 <- N - sum(counts)
+    #   
+    #   # TODO:: idk if this applies to HT estimate
+    #   variation <- model.matrix(ff, model.frame(ff, data.frame(x = 0, r = 0)))
+    #   variation <- f0^2 * as.vector(variation %*% vcov(linearModel) %*% t(variation))
+    #   ## TODO -- this is an approximation
+    #   # we're assuming var(counts["1"]) ~~ counts["1"]
+    #   # this can be made better
+    #   variation <- variation + counts["1"] * (a(0) ^ 2) * 
+    #     exp(-predict(linearModel, data.frame(x = 0))) ^ 2
+    #   
+    #   sd <- sqrt(variation)
+    #   sc <- qnorm(p = 1 - .05 / 2)
+    #   
+    #   
+    #   confidenceInterval <- 
+    #     data.frame(lowerBound = pmax(N - sc * sd, sum(counts)), 
+    #                upperBound = N + sc * sd)
+    #   print(N)
+    #   print(confidenceInterval)
+    #   
+    # } else {
+    #   ### TODO
+    #   # put observed likelihood method here
+    # }
+    # 
+    # structure(
+    #   list(
+    #     y                = if(isTRUE(returnElements[[1]])) as.numeric(observed) else NULL, # drop names
+    #     X                = if(isTRUE(returnElements[[2]])) variables else NULL,
+    #     modelFrame       = if (isTRUE(returnElements[[3]])) modelFrame else NULL,
+    #     formula          = formula,
+    #     call             = match.call(),
+    #     coefficients     = coefficients
+    #   ),
+    #   class = c("singleRRatioReg", "singleR", "glm", "lm")
+    # )
   }
 }