closes #64, closes #55, update NEWS.md,

ncn-foreigners · Jan 17, 2025 · 36affe0 · 36affe0
1 parent e02d28d
commit 36affe0
Show file tree

Hide file tree

Showing 31 changed files with 435 additions and 425 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,29 +2,29 @@
 
 S3method(AIC,nonprobsvy)
 S3method(BIC,nonprobsvy)
+S3method(check_balance,nonprobsvy)
 S3method(confint,nonprobsvy)
 S3method(cooks.distance,nonprobsvy)
 S3method(deviance,nonprobsvy)
 S3method(hatvalues,nonprobsvy)
 S3method(logLik,nonprobsvy)
 S3method(nobs,nonprobsvy)
-S3method(nonprobsvycheck,nonprobsvy)
-S3method(pop.size,nonprobsvy)
+S3method(pop_size,nonprobsvy)
 S3method(print,nonprobsvy)
 S3method(print,nonprobsvycheck)
 S3method(print,summary_nonprobsvy)
 S3method(residuals,nonprobsvy)
 S3method(summary,nonprobsvy)
 S3method(vcov,nonprobsvy)
+export(check_balance)
 export(cloglog_model_nonprobsvy)
-export(controlInf)
-export(controlOut)
-export(controlSel)
+export(control_inf)
+export(control_out)
+export(control_sel)
 export(genSimData)
 export(logit_model_nonprobsvy)
 export(nonprob)
-export(nonprobsvycheck)
-export(pop.size)
+export(pop_size)
 export(probit_model_nonprobsvy)
 import(Rcpp)
 import(mathjaxr)

diff --git a/NEWS.md b/NEWS.md
@@ -5,10 +5,15 @@
 ### Features
 - two additional datasets have been included: `jvs` (Job Vacancy Survey; a probability sample survey) and `admin` (Central Job Offers Database; a non-probability sample survey). The units and auxiliary variables have been aligned in a way that allows the data to be integrated using the methods implemented in this package.
 - a `nonprobsvycheck` function was added to check the balance in the totals of the variables based on the weighted weights between the non-probability and probability samples.
+- Important - the functions `controlSel`, `controlOut` and `controlInf` have been replaced by their counterparts `control_sel`, `control_out` and `control_inf`.
 
 ### Bugfixes
 - basic methods and functions related to variance estimation, weights and probability linking methods have been rewritten in a more optimal and readable way.
 
+### Documentation
+
+- annotation has been added that arguments such as `strata`, `subset` and `na_action` are not supported for the time being.
+
 # nonprobsvy 0.1.1
 
 ------------------------------------------------------------------------

diff --git a/R/cloglogModel.R b/R/cloglogModel.R
@@ -13,6 +13,7 @@
 #' @importFrom maxLik maxLik
 #' @importFrom Matrix Matrix
 #' @importFrom survey svyrecvar
+#' @keywords internal
 #' @export
 # must be exported to be visible in c++ script, to consider any other option
 cloglog_model_nonprobsvy <- function(...) {

diff --git a/R/control_inference.R b/R/control_inference.R
@@ -1,6 +1,8 @@
 #' @title Control parameters for inference
-#' @description \code{controlInf} constructs a list with all necessary control parameters
+#'
+#' @description \code{control_inf} constructs a list with all necessary control parameters
 #' for statistical inference.
+#'
 #' @param vars_selection If `TRUE`, then variables selection model is used.
 #' @param var_method variance method.
 #' @param rep_type replication type for weights in the bootstrap method for variance estimation passed to [survey::as.svrepdesign()].
@@ -36,7 +38,7 @@
 #'
 #' @export
 
-controlInf <- function(vars_selection = FALSE,
+control_inf <- function(vars_selection = FALSE,
                        var_method = c(
                          "analytic",
                          "bootstrap"

diff --git a/R/control_outcome.R b/R/control_outcome.R
@@ -1,6 +1,8 @@
 #' @title Control parameters for outcome model
-#' @description \code{controlOut} constructs a list with all necessary control parameters
+#'
+#' @description \code{control_out} constructs a list with all necessary control parameters
 #' for outcome model.
+#'
 #' @param epsilon Tolerance for fitting algorithms. Default is \code{1e-6}.
 #' @param maxit Maximum number of iterations.
 #' @param trace logical value. If `TRUE` trace steps of the fitting algorithms. Default is `FALSE`.
@@ -44,7 +46,7 @@
 #'
 #' @export
 
-controlOut <- function(epsilon = 1e-4,
+control_out <- function(epsilon = 1e-4,
                        maxit = 100,
                        trace = FALSE,
                        k = 1,

diff --git a/R/control_selection.R b/R/control_selection.R
@@ -1,8 +1,6 @@
 #' @title Control parameters for selection model
-#' @author Łukasz Chrostowski, Maciej Beręsewicz
-#' \loadmathjax
 #'
-#' @description \code{controlSel} constructs a list with all necessary control parameters
+#' @description \code{control_sel} constructs a list with all necessary control parameters
 #' for selection model.
 #'
 #'
@@ -48,7 +46,7 @@
 #'
 #' @export
 
-controlSel <- function(method = "glm.fit", # perhaps another control function for model with variables selection
+control_sel <- function(method = "glm.fit", # perhaps another control function for model with variables selection
                        epsilon = 1e-4,
                        maxit = 500,
                        trace = FALSE,

diff --git a/R/internals.R b/R/internals.R
@@ -124,7 +124,7 @@ start_fit <- function(X,
                       weights,
                       weights_rand,
                       method_selection,
-                      control_selection = controlSel()) {
+                      control_selection = control_sel()) {
   weights_to_glm <- c(weights_rand, weights)
   start_model <- stats::glm.fit(
     x = X, # glm model for initial values in propensity score estimation
@@ -207,7 +207,7 @@ nonprobMI_fit <- function(outcome,
                           svydesign = NULL,
                           family_outcome = "gaussian",
                           start = NULL,
-                          control_outcome = controlOut(),
+                          control_outcome = control_out(),
                           verbose = FALSE,
                           model = TRUE,
                           x = FALSE,

diff --git a/R/logitModel.R b/R/logitModel.R
@@ -18,6 +18,7 @@
 #' @importFrom stats qlogis
 #'
 #'
+#' @keywords internal
 #' @export
 # must be exported to be visible in c++ script, to consider any other option
 logit_model_nonprobsvy <- function(...) {

diff --git a/R/main_function_documentation.R b/R/main_function_documentation.R
@@ -27,10 +27,10 @@ NULL
 #' @param method_selection a `character` with method for propensity scores estimation.
 #' @param method_outcome a `character` with method for response variable estimation.
 #' @param family_outcome a `character` string describing the error distribution and link function to be used in the model. Default is "gaussian". Currently supports: gaussian with identity link, poisson and binomial.
-#' @param subset an optional `vector` specifying a subset of observations to be used in the fitting process.
-#' @param strata an optional `vector` specifying strata.
+#' @param subset an optional `vector` specifying a subset of observations to be used in the fitting process - not yet supported.
+#' @param strata an optional `vector` specifying strata - not yet supported.
 #' @param weights an optional `vector` of prior weights to be used in the fitting process. Should be NULL or a numeric vector. It is assumed that this vector contains frequency or analytic weights.
-#' @param na_action a function which indicates what should happen when the data contain `NAs`.
+#' @param na_action a function which indicates what should happen when the data contain `NAs` - not yet supported.
 #' @param control_selection a `list` indicating parameters to use in fitting selection model for propensity scores.
 #' @param control_outcome a `list` indicating parameters to use in fitting model for outcome variable.
 #' @param control_inference a `list` indicating parameters to use in inference based on probability and non-probability samples, contains parameters such as estimation method or variance method.
@@ -153,7 +153,7 @@ NULL
 #'   }
 #'   where \mjseqn{\lambda_{\theta}} and \mjseqn{q_{\lambda_{\beta}}} are some smooth functions. We let \mjseqn{q_{\lambda} \left(x\right) = \frac{\partial p_{\lambda}}{\partial x}}, where \mjseqn{p_{\lambda}} is some penalization function.
 #'   Details of penalization functions and techniques for solving this type of equation can be found [here](https://ncn-foreigners.github.io/nonprobsvy-book/variableselection.html).
-#'   To use the variable selection model, set the `vars_selection` parameter in the [controlInf()] function to `TRUE`. In addition, in the other control functions such as [controlSel()] and [controlOut()]
+#'   To use the variable selection model, set the `vars_selection` parameter in the [control_inf()] function to `TRUE`. In addition, in the other control functions such as [control_sel()] and [control_out()]
 #'   you can set parameters for the selection of the relevant variables, such as the number of folds during cross-validation algorithm or the lambda value for penalizations. Details can be found
 #'   in the documentation of the control functions for `nonprob`.
 #'
@@ -194,7 +194,7 @@ NULL
 #'  \item{\code{pop_size} -- estimated population size derived from estimated weights (non-probability sample) or known design weights (probability sample).}
 #'  \item{\code{pop_totals} -- the total values of the auxiliary variables derived from a probability sample or vector of total/mean values.}
 #'  \item{\code{outcome} -- list containing information about the fitting of the mass imputation model, in the case of regression model the object containing the list returned by
-#'  [stats::glm()], in the case of the nearest neighbour imputation the object containing list returned by [RANN::nn2()]. If `bias_correction` in [controlInf()] is set to `TRUE`, the estimation is based on
+#'  [stats::glm()], in the case of the nearest neighbour imputation the object containing list returned by [RANN::nn2()]. If `bias_correction` in [control_inf()] is set to `TRUE`, the estimation is based on
 #'  the joint estimating equations for the `selection` and `outcome` model and therefore, the list is different from the one returned by the [stats::glm()] function and contains elements such as
 #'  \itemize{
 #'  \item{\code{coefficients} -- estimated coefficients of the regression model.}
@@ -244,7 +244,7 @@ NULL
 #'  }
 #'  \item{\code{stat} -- matrix of the estimated population means in each bootstrap iteration.
 #'                       Returned only if a bootstrap method is used to estimate the variance and \code{keep_boot} in
-#'                       [controlInf()] is set on `TRUE`.}
+#'                       [control_inf()] is set on `TRUE`.}
 #' }
 #' @seealso
 #' [stats::optim()] -- For more information on the \code{optim} function used in the
@@ -263,11 +263,11 @@ NULL
 #'
 #' [RANN::nn2()] -- For more information about the nearest neighbour algorithm used during mass imputation process.
 #'
-#' [controlSel()] -- For the control parameters related to selection model.
+#' [control_sel()] -- For the control parameters related to selection model.
 #'
-#' [controlOut()] -- For the control parameters related to outcome model.
+#' [control_out()] -- For the control parameters related to outcome model.
 #'
-#' [controlInf()] -- For the control parameters related to statistical inference.
+#' [control_inf()] -- For the control parameters related to statistical inference.
 
 #' @examples
 #' \donttest{

diff --git a/R/nonprob.R b/R/nonprob.R
@@ -15,9 +15,9 @@ nonprob <- function(data,
                     strata = NULL,
                     weights = NULL,
                     na_action = NULL,
-                    control_selection = controlSel(),
-                    control_outcome = controlOut(),
-                    control_inference = controlInf(),
+                    control_selection = control_sel(),
+                    control_outcome = control_out(),
+                    control_inference = control_inf(),
                     start_selection = NULL,
                     start_outcome = NULL,
                     verbose = FALSE,
@@ -38,7 +38,8 @@ nonprob <- function(data,
   if (missing(method_outcome)) method_outcome <- "glm"
   if (!(method_outcome %in% c("glm", "nn", "pmm"))) stop("Invalid method for outcome variable.")
   if (!is.null(svydesign)) {
-    if (class(svydesign)[2] != "survey.design") stop("svydesign must be a survey.design object.")
+    if ("svyrep.design" %in% class(svydesign)) stop("We do not currently support the `svyrep.design` class. Provide the survey data in the `survey.design2` class.")
+    if ("pps" %in% class(svydesign)) stop("The `as.svrepdesign` function does not allow `pps` designs. For more details, see the `survey` package.")
   }
   if (!is.null(pop_totals)) {
     if (!is.vector(pop_totals)) stop("pop_totals must be a vector.")

diff --git a/R/nonprobDR.R b/R/nonprobDR.R
@@ -27,9 +27,9 @@ nonprobDR <- function(selection,
                       strata,
                       weights,
                       na_action,
-                      control_selection = controlSel(),
-                      control_outcome = controlOut(),
-                      control_inference = controlInf(),
+                      control_selection,
+                      control_outcome,
+                      control_inference,
                       start_outcome,
                       start_selection,
                       verbose,

diff --git a/R/nonprobIPW.R b/R/nonprobIPW.R
@@ -23,8 +23,8 @@ nonprobIPW <- function(selection,
                        strata,
                        weights,
                        na_action,
-                       control_selection = controlSel(),
-                       control_inference = controlInf(),
+                       control_selection,
+                       control_inference,
                        start_selection,
                        verbose,
                        x,

diff --git a/R/nonprobMI.R b/R/nonprobMI.R
@@ -26,7 +26,7 @@ nonprobMI <- function(outcome,
                       weights,
                       na_action,
                       control_outcome,
-                      control_inference = controlInf(var_method = "analytic"),
+                      control_inference,
                       start_outcome,
                       verbose,
                       x,

diff --git a/R/probitModel.R b/R/probitModel.R
@@ -15,6 +15,7 @@
 #' @importFrom stats dnorm
 #' @importFrom Matrix Matrix
 #' @importFrom survey svyrecvar
+#' @keywords internal
 #' @export
 # must be exported to be visible in c++ script, to consider any other option
 probit_model_nonprobsvy <- function(...) {

diff --git a/R/simple_methods.R b/R/simple_methods.R
@@ -7,9 +7,9 @@ nobs.nonprobsvy <- function(object,
                             ...) {
   c("prob" = object$prob_size, "nonprob" = object$nonprob_size)
 }
-#' @method pop.size nonprobsvy
+#' @method pop_size nonprobsvy
 #' @exportS3Method
-pop.size.nonprobsvy <- function(object,
+pop_size.nonprobsvy <- function(object,
                                 ...) {
   object$pop_size
 }
@@ -19,8 +19,8 @@ pop.size.nonprobsvy <- function(object,
 #' @param ... additional parameters
 #' @return Vector returning the value of the estimated population size.
 #' @export
-pop.size <- function(object, ...) {
-  UseMethod("pop.size")
+pop_size <- function(object, ...) {
+  UseMethod("pop_size")
 }
 #' @method residuals nonprobsvy
 #' @importFrom stats residuals
@@ -321,9 +321,9 @@ deviance.nonprobsvy <- function(object,
   if (class(object)[2] == "nonprobsvy_dr") res <- c("selection" = res_sel, "outcome" = res_out)
   res
 }
-#' @method nonprobsvycheck nonprobsvy
+#' @method check_balance nonprobsvy
 #' @exportS3Method
-nonprobsvycheck.nonprobsvy <- function(x, object, dig = 10) {
+check_balance.nonprobsvy <- function(x, object, dig = 10) {
   # Input validation
   if (!inherits(x, "formula")) {
     stop("'x' must be a formula")
@@ -443,8 +443,8 @@ nonprobsvycheck.nonprobsvy <- function(x, object, dig = 10) {
 #' @importFrom survey svytotal
 #' @importFrom stats setNames
 #' @export
-nonprobsvycheck <- function(x, object, dig) {
-  UseMethod("nonprobsvycheck", object)
+check_balance <- function(x, object, dig) {
+  UseMethod("check_balance", object)
 }
 # Internal function - not exported in CRAN version
 # Will be exported in future releases after variance estimation is implemented

diff --git a/R/summary.R b/R/summary.R
@@ -111,7 +111,7 @@ summary.nonprobsvy <- function(object,
         cnf_int = object$confidence_interval
       ),
       sample_size = nobs(object, ...),
-      population_size = pop.size(object, ...),
+      population_size = pop_size(object, ...),
       totals = object$pop_totals,
       test = test,
       control = object$control,