Skip to content

Commit

Permalink
Updated news and made some QOL improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Piotr Chlebicki committed Jan 19, 2024
1 parent b7fd415 commit 838f011
Show file tree
Hide file tree
Showing 12 changed files with 141 additions and 112 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ Imports:
lamW,
mathjaxr,
sandwich,
doParallel,
doSNOW,
progress,
foreach,
parallel
Suggests:
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ export(ztoinegbin)
export(ztoipoisson)
export(ztpoisson)
import(mathjaxr)
importFrom(doParallel,registerDoParallel)
importFrom(doSNOW,registerDoSNOW)
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
importFrom(graphics,abline)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# singleRcapture 0.2.1.2

* Bugfix for interaction terms in formula not being considered
* Switched parallel backed to `doSNOW` form `doParallel`
* Introduced progress bars to parallelized parts of code

# singleRcapture 0.2.1.1

* Bugfix for tests failing with `noLongDouble`
Expand Down
3 changes: 2 additions & 1 deletion R/Internals.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ singleRcaptureinternalpopulationEstimate <- function(y, X, grad, # check if some
method = control$fittingMethod,
controlBootstrapMethod = control$bootstrapFitcontrol,
N = N, Xvlm = Xvlm, modelFrame = modelFrame,
offset = offset, weightsFlag = weightsFlag
offset = offset, weightsFlag = weightsFlag,
visT = control$bootstrapVisualTrace
)
} else {
funBoot <- switch(
Expand Down
13 changes: 9 additions & 4 deletions R/Methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ hatvalues.singleRStaticCountData <- function(model, ...) {
#' @importFrom foreach foreach
#' @importFrom parallel makeCluster
#' @importFrom parallel stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom doSNOW registerDoSNOW
#' @rdname regDiagSingleR
#' @exportS3Method
dfbeta.singleRStaticCountData <- function(model,
Expand All @@ -665,13 +665,17 @@ dfbeta.singleRStaticCountData <- function(model,

if (cores > 1) {
cl <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cl)
pb <- progress::progress_bar$new(total = NROW(X))

opts <- if (trace) list(progress = \(n) pb$tick()) else NULL
doSNOW::registerDoSNOW(cl)
on.exit(parallel::stopCluster(cl))
#parallel::clusterExport(cl, c("singleRinternalGetXvlmMatrix", "cf", "y", "X", "maxitNew", "model", "pw", "offset", "eta"), envir = environment())

if (isFALSE(model$control$controlModel$weightsAsCounts)) {
res <- foreach::`%dopar%`(
obj = foreach::foreach(k = 1:NROW(X), .combine = rbind),
obj = foreach::foreach(k = 1:NROW(X), .combine = rbind,
.options.snow = opts),
ex = {
c(cf - estimatePopsizeFit(
control = controlMethod(
Expand All @@ -696,7 +700,8 @@ dfbeta.singleRStaticCountData <- function(model,
)
} else {
res <- foreach::`%dopar%`(
obj = foreach::foreach(k = 1:NROW(X), .combine = rbind),
obj = foreach::foreach(k = 1:NROW(X), .combine = rbind,
.options.snow = opts),
ex = {
if (isFALSE(pw[k] - 1 > 0)) {
c(cf - estimatePopsizeFit(
Expand Down
36 changes: 24 additions & 12 deletions R/bootstraps.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ noparBoot <- function(family, formulas, y, X, modelFrame,
#' @importFrom foreach foreach
#' @importFrom parallel makeCluster
#' @importFrom parallel stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom doSNOW registerDoSNOW
noparBootMultiCore <- function(family, formulas, y, X, modelFrame,
beta, weights, trcount, numboot,
eta, cores, controlBootstrapMethod = NULL,
method, N, offset, weightsFlag, ...) {
method, N, offset, weightsFlag, visT, ...) {
n <- length(y)
if (isTRUE(weightsFlag)) {
n <- sum(weights)
Expand All @@ -147,14 +147,18 @@ noparBootMultiCore <- function(family, formulas, y, X, modelFrame,
}

cl <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cl)
pb <- progress::progress_bar$new(total = numboot)

opts <- if (visT) list(progress = \(n) pb$tick()) else NULL
doSNOW::registerDoSNOW(cl)
on.exit(parallel::stopCluster(cl))


### TODO:: This gives a different results for family = "chao" and "zelterman"
### when compared to non paralelized version
strappedStatistic <- foreach::`%dopar%`(
obj = foreach::foreach(k = 1:numboot, .combine = c),
obj = foreach::foreach(k = 1:numboot, .combine = c,
.options.snow = opts),
#obj = foreach::foreach(k = 1:numboot, .export = "singleRcaptureinternalIRLSmultipar"),
ex = {
theta <- NULL
Expand Down Expand Up @@ -320,11 +324,11 @@ semparBoot <- function(family, formulas, y, X, beta,
#' @importFrom foreach foreach
#' @importFrom parallel makeCluster
#' @importFrom parallel stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom doSNOW registerDoSNOW
semparBootMultiCore <- function(family, formulas, y, X, modelFrame,
beta, weights, trcount, numboot,
eta, cores, controlBootstrapMethod = NULL,
method, N, offset, weightsFlag, ...) {
method, N, offset, weightsFlag, visT, ...) {
n <- length(y)
if (isTRUE(weightsFlag)) {
n <- sum(weights)
Expand All @@ -338,11 +342,15 @@ semparBootMultiCore <- function(family, formulas, y, X, modelFrame,
N <- round(sum(N))

cl <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cl)
pb <- progress::progress_bar$new(total = numboot)

opts <- if (visT) list(progress = \(n) pb$tick()) else NULL
doSNOW::registerDoSNOW(cl)
on.exit(parallel::stopCluster(cl))

strappedStatistic <- foreach::`%dopar%`(
obj = foreach::foreach(k = 1:numboot, .combine = c),
obj = foreach::foreach(k = 1:numboot, .combine = c,
.options.snow = opts),
ex = {
theta <- NULL
while (is.null(theta)) {
Expand Down Expand Up @@ -558,12 +566,12 @@ parBoot <- function(family, formulas, y, X, beta, weights,
#' @importFrom foreach foreach
#' @importFrom parallel makeCluster
#' @importFrom parallel stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom doSNOW registerDoSNOW
#' @importFrom stats rbinom
parBootMultiCore <- function(family, formulas, y, X, modelFrame,
beta, weights, trcount, numboot,
eta, cores, controlBootstrapMethod = NULL,
method, N, offset, weightsFlag, ...) {
method, N, offset, weightsFlag, visT, ...) {
n <- length(y)
if (isTRUE(weightsFlag)) {
n <- sum(weights)
Expand All @@ -590,14 +598,18 @@ parBootMultiCore <- function(family, formulas, y, X, modelFrame,
prob <- contr / N

cl <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cl)
pb <- progress::progress_bar$new(total = numboot)

opts <- if (visT) list(progress = \(n) pb$tick()) else NULL
doSNOW::registerDoSNOW(cl)
on.exit(parallel::stopCluster(cl))
#doRNG::registerDoRNG()

### TODO:: This gives a different results for family = "chao" and "zelterman"
### when compared to non paralelized version
strappedStatistic <- foreach::`%dopar%`(
obj = foreach::foreach(k = 1:numboot, .combine = c),
obj = foreach::foreach(k = 1:numboot, .combine = c,
.options.snow = opts),
ex = {
theta <- NULL
while (is.null(theta)) {
Expand Down
5 changes: 3 additions & 2 deletions R/control.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ controlModel <- function(weightsAsCounts = FALSE,
#' respective standard error and variance estimation.
#'
#' @param alpha significance level, 0.05 used by default.
#' @param cores For bootstrap only, number of processor cores to be used,
#' @param cores for bootstrap only, number of processor cores to be used,
#' any number greater than 1 activates code designed with \code{doParallel},
#' \code{foreach} and \code{parallel} packages. Note that for now using parallel
#' computing makes tracing impossible so \code{traceBootstrapSize} and
Expand All @@ -214,7 +214,8 @@ controlModel <- function(weightsAsCounts = FALSE,
#' @param traceBootstrapSize boolean value indicating whether to print size of
#' bootstrapped sample after truncation for semi- and fully parametric bootstraps.
#' @param bootstrapVisualTrace boolean value indicating whether to plot bootstrap
#' statistics in real time.
#' statistics in real time if \code{cores = 1} if \code{cores > 1} it instead
#' indicates whether to make progress bar.
#' @param fittingMethod method used for fitting models from bootstrap samples.
#' @param bootstrapFitcontrol control parameters for each regression works exactly
#' like \code{controlMethod} but for fitting models from bootstrap samples.
Expand Down
3 changes: 2 additions & 1 deletion R/documentationFiles.R
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ NULL
#' \code{foreach} and \code{parallel} packages. Note that for now using parallel
#' computing makes tracing impossible so \code{trace} parameter is ignored in this case.
#' @param type a type of residual to return.
#' @param trace logical value specifying whether to tracking results.
#' @param trace logical value specifying whether to tracking results when
#' \code{cores > 1} it will result in a progress bar being created.
#' @param maxitNew maximal number of iterations for regressions with starting
#' points \mjseqn{\hat{\boldsymbol{\beta}}} on data
#' specified at call for \code{model} after the removal of k'th row. By default 1.
Expand Down
172 changes: 86 additions & 86 deletions R/estimatePopsize.R
Original file line number Diff line number Diff line change
Expand Up @@ -758,91 +758,91 @@ estimatePopsize.default <- function(formula,
)
} else {
stop("Ratio regression is not yet implemented")
ff <- formula
# TODO make this inherit from family
a <- function(x) {x+1}
if (length(ff) == 3) {ff[[3]] <- 1}
modelFrame <- stats::model.frame(ff, data, ...)

observed <- modelFrame |>
model.response() |>
as.vector() # dropping names, won't be needed

delete <- modelFrame |>
attr("names")

ff <- log(r) ~ 1
ff[[3]] <- formula[[3]]

counts <- table(observed)

if (TRUE) {
r <- sapply(1:(max(observed)-1), function(x) {
family$ratioFunc(x) * counts[as.character(x+1)] / counts[as.character(x)]
}) |> as.vector()

if (!is.null(weights)) {
priorWeights <- as.numeric(weights)
} else {
priorWeights <- rep(1, nrow(modelFrame))
}

if (TRUE) {
weights <- (1/counts[1:(max(observed)-1)] + 1/counts[2:max(observed)]) ^ -1
#weights <- priorWeights * weights
} else {
# weighting outgh to be optional
}

# maybe include data here
linearModel <- lm(ff, data = data.frame(
r = r, x = 1:(max(observed) - 1)
), weights = weights, ...)

fitRatio <- predict(linearModel, data.frame(x = 0:(max(observed)-1)))
fitRatio <- exp(fitRatio)

# first est for N
N <- sum(counts) / (1 - 1 / sum(c(1, cumprod(fitRatio))))
# second est for N
N <- c(N, sum(counts) + family$ratioFunc(0)*counts["1"] / fitRatio[1])
names(N) <- c("ht", "reg")
f0 <- N - sum(counts)

# TODO:: idk if this applies to HT estimate
variation <- model.matrix(ff, model.frame(ff, data.frame(x = 0, r = 0)))
variation <- f0^2 * as.vector(variation %*% vcov(linearModel) %*% t(variation))
## TODO -- this is an approximation
# we're assuming var(counts["1"]) ~~ counts["1"]
# this can be made better
variation <- variation + counts["1"] * (a(0) ^ 2) *
exp(-predict(linearModel, data.frame(x = 0))) ^ 2

sd <- sqrt(variation)
sc <- qnorm(p = 1 - .05 / 2)


confidenceInterval <-
data.frame(lowerBound = pmax(N - sc * sd, sum(counts)),
upperBound = N + sc * sd)
print(N)
print(confidenceInterval)

} else {
### TODO
# put observed likelihood method here
}

structure(
list(
y = if(isTRUE(returnElements[[1]])) as.numeric(observed) else NULL, # drop names
X = if(isTRUE(returnElements[[2]])) variables else NULL,
modelFrame = if (isTRUE(returnElements[[3]])) modelFrame else NULL,
formula = formula,
call = match.call(),
coefficients = coefficients
),
class = c("singleRRatioReg", "singleR", "glm", "lm")
)
# ff <- formula
# # TODO make this inherit from family
# a <- function(x) {x+1}
# if (length(ff) == 3) {ff[[3]] <- 1}
# modelFrame <- stats::model.frame(ff, data, ...)
#
# observed <- modelFrame |>
# model.response() |>
# as.vector() # dropping names, won't be needed
#
# delete <- modelFrame |>
# attr("names")
#
# ff <- log(r) ~ 1
# ff[[3]] <- formula[[3]]
#
# counts <- table(observed)
#
# if (TRUE) {
# r <- sapply(1:(max(observed)-1), function(x) {
# family$ratioFunc(x) * counts[as.character(x+1)] / counts[as.character(x)]
# }) |> as.vector()
#
# if (!is.null(weights)) {
# priorWeights <- as.numeric(weights)
# } else {
# priorWeights <- rep(1, nrow(modelFrame))
# }
#
# if (TRUE) {
# weights <- (1/counts[1:(max(observed)-1)] + 1/counts[2:max(observed)]) ^ -1
# #weights <- priorWeights * weights
# } else {
# # weighting outgh to be optional
# }
#
# # maybe include data here
# linearModel <- lm(ff, data = data.frame(
# r = r, x = 1:(max(observed) - 1)
# ), weights = weights, ...)
#
# fitRatio <- predict(linearModel, data.frame(x = 0:(max(observed)-1)))
# fitRatio <- exp(fitRatio)
#
# # first est for N
# N <- sum(counts) / (1 - 1 / sum(c(1, cumprod(fitRatio))))
# # second est for N
# N <- c(N, sum(counts) + family$ratioFunc(0)*counts["1"] / fitRatio[1])
# names(N) <- c("ht", "reg")
# f0 <- N - sum(counts)
#
# # TODO:: idk if this applies to HT estimate
# variation <- model.matrix(ff, model.frame(ff, data.frame(x = 0, r = 0)))
# variation <- f0^2 * as.vector(variation %*% vcov(linearModel) %*% t(variation))
# ## TODO -- this is an approximation
# # we're assuming var(counts["1"]) ~~ counts["1"]
# # this can be made better
# variation <- variation + counts["1"] * (a(0) ^ 2) *
# exp(-predict(linearModel, data.frame(x = 0))) ^ 2
#
# sd <- sqrt(variation)
# sc <- qnorm(p = 1 - .05 / 2)
#
#
# confidenceInterval <-
# data.frame(lowerBound = pmax(N - sc * sd, sum(counts)),
# upperBound = N + sc * sd)
# print(N)
# print(confidenceInterval)
#
# } else {
# ### TODO
# # put observed likelihood method here
# }
#
# structure(
# list(
# y = if(isTRUE(returnElements[[1]])) as.numeric(observed) else NULL, # drop names
# X = if(isTRUE(returnElements[[2]])) variables else NULL,
# modelFrame = if (isTRUE(returnElements[[3]])) modelFrame else NULL,
# formula = formula,
# call = match.call(),
# coefficients = coefficients
# ),
# class = c("singleRRatioReg", "singleR", "glm", "lm")
# )
}
}
Loading

0 comments on commit 838f011

Please sign in to comment.