fixing documentation

ncn-foreigners · Mar 4, 2025 · 210bad3 · 210bad3
1 parent a6f2aa1
commit 210bad3
Show file tree

Hide file tree

Showing 6 changed files with 46 additions and 26 deletions.
diff --git a/R/method_nn.R b/R/method_nn.R
@@ -40,7 +40,7 @@
 #' 2. Match units from \mjseqn{S_B} to \mjseqn{S_A'} to obtain predictions \mjseqn{y^*}=\mjseqn{{k}^{-1}\sum_{k}y_k}.
 #' 3. Estimate \mjseqn{\hat{\mu}=\frac{1}{N} \sum_{i \in S_B} d_i y_i^*}.
 #' 4. Repeat steps 1-3 \mjseqn{M} times (we set \mjseqn{M=50} in our simulations; this is hard-coded).
-#' 5. Estimate \mjseqn{\hat{V}_1=\text{var}{\hat{\boldsymbol{\mu}}}} obtained from simulations and save it as `var_nonprob`.
+#' 5. Estimate \mjseqn{\hat{V}_1=\text{var}({\hat{\boldsymbol{\mu}}})} obtained from simulations and save it as `var_nonprob`.
 #'
 #'
 #' (b) probability part (\mjseqn{S_B} with size \mjseqn{n_B}; denoted as `var_prob` in the result)

diff --git a/R/method_pmm.R b/R/method_pmm.R
@@ -3,8 +3,8 @@
 #' \loadmathjax
 #'
 #' @description
-#' Model for the outcome for the mass imputation estimator. The implementation is currently based on [RANN::nn2] function and thus it uses Euclidean distance for matching units from \mjseqn{S_A} (nonprobability) to \mjseqn{S_B} (probability) based on predicted values from model \mjseqnm{\boldsymbol{x}_i)} based
-#' either on `method_glm` or `method_npar`.  Estimation of the mean is done using \mjseqn{S_B} sample.
+#' Model for the outcome for the mass imputation estimator. The implementation is currently based on [RANN::nn2] function and thus it uses Euclidean distance for matching units from \mjseqn{S_A} (nonprobability) to \mjseqn{S_B} (probability) based on predicted values from model \mjseqn{\boldsymbol{x}_i} based
+#' either on `method_glm` or `method_npar`. Estimation of the mean is done using \mjseqn{S_B} sample.
 #'
 #' This implementation extends Yang et al. (2021) approach as described in Chlebicki et al. (2025), namely:
 #'
@@ -17,7 +17,16 @@
 #'  variance minimization procedure (`pmm_k_choice` from the [control_out()] function)}
 #' }
 #'
-#' @details Analytical variance
+#' @details
+#'
+#' Matching
+#'
+#' In the package we support two types of matching:
+#'
+#' 1. \mjseqn{\hat{y} - \hat{y}} matching (default; `control_out(pmm_match_type = 1)`).
+#' 2. \mjseqn{\hat{y} - y} matching (`control_out(pmm_match_type = 2)`).
+#'
+#' Analytical variance
 #'
 #' The variance of the mean is estimated based on the following approach
 
@@ -27,10 +36,11 @@
 #' can be summarized as follows:
 #'
 #' 1. Sample \mjseqn{n_A} units from \mjseqn{S_A} with replacement to create \mjseqn{S_A'} (if pseudo-weights are present inclusion probabilities should be proportional to their inverses).
-#' 2. --
-#' 3. --
-#' 4. --
-#' 5. Estimate \mjseqn{\hat{V}_1=\text{var}{\hat{\boldsymbol{\mu}}}} obtained from simulations and save it as `var_nonprob`.
+#' 2. Estimate regression model \mjseqn{\mathbb{E}[Y|\boldsymbol{X}]=m(\boldsymbol{X}, \cdot)} based on \mjseqn{S_{A}'} from step 1.
+#' 3. Compute \mjseqn{\hat{\nu}'(i,t)} for \mjseqn{t=1,\dots,k, i\in S_{B}} using estimated \mjseqn{m(\boldsymbol{x}', \cdot)} and \mjseqn{\left\lbrace(y_{j},\boldsymbol{x}_{j})| j\in S_{A}'\right\rbrace}.
+#' 4. Compute \mjseqn{\displaystyle\frac{1}{k}\sum_{t=1}^{k}y_{\hat{\nu}'(i)}} using \mjseqn{Y} values from \mjseqn{S_{A}'}.
+#' 5. Repeat steps 1-4 \mjseqn{M} times (we set (hard-coded) \mjseqn{M=50} in our code).
+#' 6. Estimate \mjseqn{\hat{V}_1=\text{var}({\hat{\boldsymbol{\mu}}})} obtained from simulations and save it as `var_nonprob`.
 #'
 #' (b) probability part (\mjseqn{S_B} with size \mjseqn{n_B}; denoted as `var_prob` in the result)
 #'

diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -31,17 +31,18 @@ Youngdeok
 Zhonglei
 al
 arXiv
+behaviour
 boldsymbol
 checkmark
 cloglog
-continous
 doi
 et
 favour
 generalised
 github
 glm
 jacobian
+loess
 loglik
 maximisation
 minimisation

diff --git a/inst/tinytest/test_simulations.R b/inst/tinytest/test_simulations.R
@@ -78,16 +78,16 @@ expect_equal(
 
 ### mi npar
 
-mi_npar <- nonprob(outcome = y1 + y2~x1 + x2,
-                  svydesign = kim2019_sample_prob,
-                  method_outcome = "npar",
-                  data = kim2019_sample_nonprob)
-
-expect_equal(
-  mi_npar$confidence_interval$lower_bound < kim2019_y_true &
-    mi_npar$confidence_interval$upper_bound > kim2019_y_true,
-  c(TRUE, TRUE)
-)
+# mi_npar <- nonprob(outcome = y1 + y2 ~ x1 + x2,
+#                   svydesign = kim2019_sample_prob,
+#                   method_outcome = "npar",
+#                   data = kim2019_sample_nonprob)
+#
+# expect_equal(
+#   mi_npar$confidence_interval$lower_bound < kim2019_y_true &
+#     mi_npar$confidence_interval$upper_bound > kim2019_y_true,
+#   c(TRUE, TRUE)
+# )
 
 
 # pop level data ----------------------------------------------------------

diff --git a/man/method_nn.Rd b/man/method_nn.Rd
diff --git a/man/method_pmm.Rd b/man/method_pmm.Rd