Merge pull request #67 from ncn-foreigners/dev

merged minor changes
ncn-foreigners · Jan 22, 2025 · 2575223 · 2575223
2 parents 14e9dc7 + faebafd
commit 2575223
Show file tree

Hide file tree

Showing 38 changed files with 589 additions and 579 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,29 +2,29 @@
 
 S3method(AIC,nonprobsvy)
 S3method(BIC,nonprobsvy)
+S3method(check_balance,nonprobsvy)
 S3method(confint,nonprobsvy)
 S3method(cooks.distance,nonprobsvy)
 S3method(deviance,nonprobsvy)
 S3method(hatvalues,nonprobsvy)
 S3method(logLik,nonprobsvy)
 S3method(nobs,nonprobsvy)
-S3method(nonprobsvycheck,nonprobsvy)
-S3method(pop.size,nonprobsvy)
+S3method(pop_size,nonprobsvy)
 S3method(print,nonprobsvy)
 S3method(print,nonprobsvycheck)
 S3method(print,summary_nonprobsvy)
 S3method(residuals,nonprobsvy)
 S3method(summary,nonprobsvy)
 S3method(vcov,nonprobsvy)
+export(check_balance)
 export(cloglog_model_nonprobsvy)
-export(controlInf)
-export(controlOut)
-export(controlSel)
+export(control_inf)
+export(control_out)
+export(control_sel)
 export(genSimData)
 export(logit_model_nonprobsvy)
 export(nonprob)
-export(nonprobsvycheck)
-export(pop.size)
+export(pop_size)
 export(probit_model_nonprobsvy)
 import(Rcpp)
 import(mathjaxr)

diff --git a/NEWS.md b/NEWS.md
@@ -5,10 +5,15 @@
 ### Features
 - two additional datasets have been included: `jvs` (Job Vacancy Survey; a probability sample survey) and `admin` (Central Job Offers Database; a non-probability sample survey). The units and auxiliary variables have been aligned in a way that allows the data to be integrated using the methods implemented in this package.
 - a `nonprobsvycheck` function was added to check the balance in the totals of the variables based on the weighted weights between the non-probability and probability samples.
+- Important - the functions `controlSel`, `controlOut` and `controlInf` have been replaced by their counterparts `control_sel`, `control_out` and `control_inf`.
 
 ### Bugfixes
 - basic methods and functions related to variance estimation, weights and probability linking methods have been rewritten in a more optimal and readable way.
 
+### Documentation
+
+- annotation has been added that arguments such as `strata`, `subset` and `na_action` are not supported for the time being.
+
 # nonprobsvy 0.1.1
 
 ------------------------------------------------------------------------
@@ -19,13 +24,13 @@
 - bug Fix related to storing `vector` in `model_frame` when predicting `y_hat` in mass imputation `glm` model when X is based in one auxiliary variable only - fix provided converting it to `data.frame` object.
 
 ### Features
-- add information to `summary` about quality of estimation basing on difference between estimated and known total values of auxiliary variables
-- add estimation of exact standard error for k-nearest neighbor estimator.
-- add breaking change to `controlOut` function by switching values for `predictive_match` argument. From now on, the `predictive_match = 1` means $\hat{y}-\hat{y}$ in predictive mean matching imputation and `predictive_match = 2` corresponds to $\hat{y}-y$ matching.
-- implement `div` option when variable selection (more in documentation) for doubly robust estimation.
-- add more insights to `nonprob` output such as gradient, hessian and jacobian derived from IPW estimation for `mle` and `gee` methods when `IPW` or `DR` model executed.
-- add estimated inclusion probabilities and its derivatives for probability and non-probability samples to `nonprob` output when `IPW` or `DR` model executed.
-- add `model_frame` matrix data from probability sample used for mass imputation to `nonprob` when `MI` or `DR` model executed.
+- added information to `summary` about quality of estimation basing on difference between estimated and known total values of auxiliary variables
+- added estimation of exact standard error for k-nearest neighbor estimator.
+- added breaking change to `controlOut` function by switching values for `predictive_match` argument. From now on, the `predictive_match = 1` means $\hat{y}-\hat{y}$ in predictive mean matching imputation and `predictive_match = 2` corresponds to $\hat{y}-y$ matching.
+- implemented `div` option when variable selection (more in documentation) for doubly robust estimation.
+- added more insights to `nonprob` output such as gradient, hessian and jacobian derived from IPW estimation for `mle` and `gee` methods when `IPW` or `DR` model executed.
+- added estimated inclusion probabilities and its derivatives for probability and non-probability samples to `nonprob` output when `IPW` or `DR` model executed.
+- added `model_frame` matrix data from probability sample used for mass imputation to `nonprob` when `MI` or `DR` model executed.
 
 ### Unit tests
 - added unit tests for variable selection models and mi estimation with vector of population totals available

diff --git a/R/bias_correction_ipw.R b/R/bias_correction_ipw.R
@@ -57,10 +57,10 @@ mm <- function(X,
       "2" = warning("Relatively convergent algorithm when fitting selection model by nleqslv, but user must check if function values are acceptably small."),
       "3" = warning("Algorithm did not find suitable point - has stalled cannot find an acceptable new point when fitting selection model by nleqslv."),
       "4" = warning("Iteration limit exceeded when fitting selection model by nleqslv."),
-      "5" = warning("ill-conditioned Jacobian when fitting selection model by nleqslv."),
+      "5" = warning("Ill-conditioned Jacobian when fitting selection model by nleqslv."),
       "6" = warning("Jacobian is singular when fitting selection model by nleqslv."),
       "7" = warning("Jacobian is unusable when fitting selection model by nleqslv."),
-      "-10" = warning("user specified Jacobian is incorrect when fitting selection model by nleqslv.")
+      "-10" = warning("User specified Jacobian is incorrect when fitting selection model by nleqslv.")
     )
   }
 

diff --git a/R/cloglogModel.R b/R/cloglogModel.R
@@ -13,6 +13,7 @@
 #' @importFrom maxLik maxLik
 #' @importFrom Matrix Matrix
 #' @importFrom survey svyrecvar
+#' @keywords internal
 #' @export
 # must be exported to be visible in c++ script, to consider any other option
 cloglog_model_nonprobsvy <- function(...) {
@@ -153,9 +154,9 @@ cloglog_model_nonprobsvy <- function(...) {
       if (maxLik_an$convergence %in% c(1, 10, 51, 52)) {
         switch(as.character(maxLik_an$convergence),
           "1" = warning("Warning in fitting selection model with optim: the iteration limit maxit had been reached."),
-          "10" = warning("degeneracy of the Nelder Mead simplex in fitting selection model by optim."), # TODO -
-          "51" = warning("warning from the L BFGS B when fitting by optim."), # TODO -
-          "52" = stop("indicates an error from the L-BFGS-B method when fitting by optim.")
+          "10" = warning("Degeneracy of the Nelder Mead simplex in fitting selection model by optim."), # TODO -
+          "51" = warning("Warning from the L BFGS B when fitting by optim."), # TODO -
+          "52" = stop("Indicates an error from the L-BFGS-B method when fitting by optim.")
         )
       }
       theta <- maxLik_an$par

diff --git a/R/control_inference.R b/R/control_inference.R
@@ -1,31 +1,33 @@
 #' @title Control parameters for inference
-#' @description \code{controlInf} constructs a list with all necessary control parameters
+#'
+#' @description \code{control_inf} constructs a list with all necessary control parameters
 #' for statistical inference.
-#' @param vars_selection If `TRUE`, then variables selection model is used.
-#' @param var_method variance method.
-#' @param rep_type replication type for weights in the bootstrap method for variance estimation passed to [survey::as.svrepdesign()].
+#'
+#' @param vars_selection If `TRUE`, then the variables selection model is used.
+#' @param var_method the variance method.
+#' @param rep_type the replication type for weights in the bootstrap method for variance estimation passed to [survey::as.svrepdesign()].
 #'  Default is `subbootstrap`.
-#' @param bias_inf inference method in the bias minimization.
+#' @param bias_inf the inference method in the bias minimization.
 #' \itemize{
-#'   \item if \code{union} then final model is fitting on union of selected variables for selection and outcome models
-#'   \item if \code{div} then final model is fitting separately on division of selected variables into relevant ones for
+#'   \item if \code{union}, then the final model is fitted on the union of selected variables for selection and outcome models
+#'   \item if \code{div}, then the final model is fitted separately on division of selected variables into relevant ones for
 #'   selection and outcome model.
 #'   }
-#' @param bias_correction if `TRUE`, then bias minimization estimation used during fitting the model.
-#' @param num_boot number of iteration for bootstrap algorithms.
-#' @param alpha Significance level, Default is 0.05.
-#' @param cores Number of cores in parallel computing.
-#' @param keep_boot Logical indicating whether statistics from bootstrap should be kept.
+#' @param bias_correction if `TRUE`, then the bias minimization estimation used during model fitting.
+#' @param num_boot the number of iteration for bootstrap algorithms.
+#' @param alpha significance level, 0.05 by defult.
+#' @param cores the number of cores in parallel computing.
+#' @param keep_boot a logical value indicating whether statistics from bootstrap should be kept.
 #' By default set to \code{TRUE}
-#' @param nn_exact_se Logical value indicating whether to compute the exact
+#' @param nn_exact_se a logical value indicating whether to compute the exact
 #' standard error estimate for \code{nn} or \code{pmm} estimator. The variance estimator for
 #' estimation based on \code{nn} or \code{pmm} can be decomposed into three parts, with the
-#' third being computed using covariance between imputed values for units in
-#' probability sample using predictive matches from non-probability sample.
+#' third computed using covariance between imputed values for units in
+#' the probability sample using predictive matches from the non-probability sample.
 #' In most situations this term is negligible and is very computationally
-#' expensive so by default this is set to \code{FALSE}, but it is recommended to
-#' set this value to \code{TRUE} before submitting final results.
-#' @param pi_ij TODO, either matrix or \code{ppsmat} class object.
+#' expensive so by default it is set to \code{FALSE}, but the recommended option is to
+#' set this value to \code{TRUE} before submitting the final results.
+#' @param pi_ij TODO, either a matrix or a \code{ppsmat} class object.
 #'
 #'
 #' @return List with selected parameters.
@@ -36,7 +38,7 @@
 #'
 #' @export
 
-controlInf <- function(vars_selection = FALSE,
+control_inf <- function(vars_selection = FALSE,
                        var_method = c(
                          "analytic",
                          "bootstrap"

diff --git a/R/control_outcome.R b/R/control_outcome.R
@@ -1,6 +1,8 @@
 #' @title Control parameters for outcome model
-#' @description \code{controlOut} constructs a list with all necessary control parameters
+#'
+#' @description \code{control_out} constructs a list with all necessary control parameters
 #' for outcome model.
+#'
 #' @param epsilon Tolerance for fitting algorithms. Default is \code{1e-6}.
 #' @param maxit Maximum number of iterations.
 #' @param trace logical value. If `TRUE` trace steps of the fitting algorithms. Default is `FALSE`.
@@ -44,7 +46,7 @@
 #'
 #' @export
 
-controlOut <- function(epsilon = 1e-4,
+control_out <- function(epsilon = 1e-4,
                        maxit = 100,
                        trace = FALSE,
                        k = 1,

diff --git a/R/control_selection.R b/R/control_selection.R
@@ -1,8 +1,6 @@
 #' @title Control parameters for selection model
-#' @author Łukasz Chrostowski, Maciej Beręsewicz
-#' \loadmathjax
 #'
-#' @description \code{controlSel} constructs a list with all necessary control parameters
+#' @description \code{control_sel} constructs a list with all necessary control parameters
 #' for selection model.
 #'
 #'
@@ -22,7 +20,7 @@
 #'   \frac{\pi(\mathbf{x}, \boldsymbol{\theta})}{\mathbf{x}}}
 #'   \item if \code{2} then \mjseqn{ \mathbf{h}\left(\mathbf{x}, \boldsymbol{\theta}\right) = \mathbf{x}}
 #'   }
-#' @param penalty The penanlization function used during variables selection.
+#' @param penalty The penalization function used during variables selection.
 #' @param a_SCAD The tuning parameter of the SCAD penalty for selection model. Default is 3.7.
 #' @param a_MCP The tuning parameter of the MCP penalty for selection model. Default is 3.
 #' @param lambda A user-specified \mjseqn{\lambda} value during variable selection model fitting.
@@ -48,7 +46,7 @@
 #'
 #' @export
 
-controlSel <- function(method = "glm.fit", # perhaps another control function for model with variables selection
+control_sel <- function(method = "glm.fit", # perhaps another control function for model with variables selection
                        epsilon = 1e-4,
                        maxit = 500,
                        trace = FALSE,

diff --git a/R/data.R b/R/data.R
@@ -1,7 +1,7 @@
 #' Job Vacancy Survey
 #'
 #' @description
-#' This is a subset of the subset of the Job Vacancy Survey from Poland (for one quarter).
+#' This is a subset of the Job Vacancy Survey from Poland (for one quarter).
 #' The data has been subject to slight manipulation, but the relationships in the data have been preserved.
 #' For further details on the JVS, please refer to the following link:
 #' \url{https://stat.gov.pl/obszary-tematyczne/rynek-pracy/popyt-na-prace/zeszyt-metodologiczny-popyt-na-prace,3,1.html}.

diff --git a/R/internals.R b/R/internals.R
@@ -124,7 +124,7 @@ start_fit <- function(X,
                       weights,
                       weights_rand,
                       method_selection,
-                      control_selection = controlSel()) {
+                      control_selection = control_sel()) {
   weights_to_glm <- c(weights_rand, weights)
   start_model <- stats::glm.fit(
     x = X, # glm model for initial values in propensity score estimation
@@ -207,7 +207,7 @@ nonprobMI_fit <- function(outcome,
                           svydesign = NULL,
                           family_outcome = "gaussian",
                           start = NULL,
-                          control_outcome = controlOut(),
+                          control_outcome = control_out(),
                           verbose = FALSE,
                           model = TRUE,
                           x = FALSE,

diff --git a/R/logitModel.R b/R/logitModel.R
@@ -18,6 +18,7 @@
 #' @importFrom stats qlogis
 #'
 #'
+#' @keywords internal
 #' @export
 # must be exported to be visible in c++ script, to consider any other option
 logit_model_nonprobsvy <- function(...) {
@@ -133,8 +134,8 @@ logit_model_nonprobsvy <- function(...) {
         switch(as.character(maxLik_an$convergence),
           "1" = warning("Warning in fitting selection model with optim: the iteration limit maxit had been reached."),
           "10" = warning("degeneracy of the Nelder Mead simplex in fitting selection model by optim."), # TODO -
-          "51" = warning("warning from the L BFGS B when fitting by optim."), # TODO -
-          "52" = stop("indicates an error from the L BFGS B method when fitting by optim.")
+          "51" = warning("Warning from the L BFGS B when fitting by optim."), # TODO -
+          "52" = stop("Indicates an error from the L BFGS B method when fitting by optim.")
         )
       }