From 5d16f4f719e2398331658466bce2d0ba3fabe653 Mon Sep 17 00:00:00 2001
From: BERENZ <maciej.bereswewicz@ue.poznan.pl>
Date: Mon, 27 Jan 2025 22:29:03 +0100
Subject: [PATCH] more informative information

---
 NAMESPACE             |  9 -----
 NEWS.md               |  5 +++
 R/boot_mi.R           |  6 ++--
 R/cloglogModel.R      | 22 ++++++------
 R/control_inference.R |  4 +--
 R/control_selection.R |  6 ++--
 R/data_manip.R        |  2 +-
 R/generateData.r      | 78 -------------------------------------------
 R/internals.R         |  6 ++--
 R/logitModel.R        | 24 ++++++-------
 R/nonprob.R           | 34 +++++++++----------
 R/nonprobDR.R         |  8 ++---
 R/nonprobIPW.R        |  8 ++---
 R/nonprobMI.R         |  6 ++--
 R/probitModel.R       | 22 ++++++------
 R/simple_methods.R    | 42 +++++++++++------------
 man/control_sel.Rd    |  4 +--
 man/genSimData.Rd     | 47 --------------------------
 18 files changed, 102 insertions(+), 231 deletions(-)
 delete mode 100644 R/generateData.r
 delete mode 100644 man/genSimData.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 42f8292..b447a71 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -21,7 +21,6 @@ export(cloglog_model_nonprobsvy)
 export(control_inf)
 export(control_out)
 export(control_sel)
-export(genSimData)
 export(logit_model_nonprobsvy)
 export(nonprob)
 export(pop_size)
@@ -53,7 +52,6 @@ importFrom(stats,coef)
 importFrom(stats,confint)
 importFrom(stats,contrasts)
 importFrom(stats,cooks.distance)
-importFrom(stats,cor)
 importFrom(stats,cov)
 importFrom(stats,delete.response)
 importFrom(stats,deviance)
@@ -61,7 +59,6 @@ importFrom(stats,dnorm)
 importFrom(stats,get_all_vars)
 importFrom(stats,glm.fit)
 importFrom(stats,hatvalues)
-importFrom(stats,lm.fit)
 importFrom(stats,loess)
 importFrom(stats,loess.control)
 importFrom(stats,logLik)
@@ -77,18 +74,12 @@ importFrom(stats,printCoefmat)
 importFrom(stats,pt)
 importFrom(stats,qlogis)
 importFrom(stats,qnorm)
-importFrom(stats,rbinom)
-importFrom(stats,rchisq)
 importFrom(stats,reformulate)
 importFrom(stats,residuals)
-importFrom(stats,rexp)
-importFrom(stats,rnorm)
-importFrom(stats,runif)
 importFrom(stats,sd)
 importFrom(stats,setNames)
 importFrom(stats,summary.glm)
 importFrom(stats,terms)
-importFrom(stats,uniroot)
 importFrom(stats,update)
 importFrom(stats,var)
 importFrom(stats,vcov)
diff --git a/NEWS.md b/NEWS.md
index 3cc785f..143aeab 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,8 @@
 ### Breaking changes
 
 - functions `pop.size`, `controlSel`, `controlOut` and `controlInf` were renamed to `pop_size`, `control_sel`, `control_out` and `control_inf` respectively.
+- function `genSimData` removed completely as it is not used anywhere in the package.
+- argument `maxLik_method` renamed to `maxlik_method` in the `control_sel` function.
 
 ### Features
 
@@ -15,6 +17,9 @@
 ### Bugfixes
 - basic methods and functions related to variance estimation, weights and probability linking methods have been rewritten in a more optimal and readable way.
 
+### Other
+- more informative error messages added.
+
 ### Documentation
 
 - annotation has been added that arguments such as `strata`, `subset` and `na_action` are not supported for the time being.
diff --git a/R/boot_mi.R b/R/boot_mi.R
index 1fbd233..2a35170 100644
--- a/R/boot_mi.R
+++ b/R/boot_mi.R
@@ -57,7 +57,7 @@ bootMI <- function(X_rand,
     if (class(svydesign)[1] != "pps") {
       rep_weights <- survey::as.svrepdesign(svydesign, type = rep_type, replicates = num_boot)$repweights$weights
     } else {
-      stop("pps bootstrap variance in development")
+      stop("The pps bootstrap variance estimator is under development")
     }
     if (method == "glm") {
       while (k <= num_boot) {
@@ -444,7 +444,7 @@ bootMI_multicore <- function(X_rand,
     family_nonprobsvy <- family_nonprobsvy()
   }
 
-  if (verbose) message("Multicores bootstrap in progress..")
+  if (verbose) message("Multicores bootstrap in progress...")
 
   cl <- parallel::makeCluster(cores)
   doParallel::registerDoParallel(cl)
@@ -460,7 +460,7 @@ bootMI_multicore <- function(X_rand,
     if (class(svydesign)[1] != "pps") {
       rep_weights <- survey::as.svrepdesign(svydesign, type = rep_type, replicates = num_boot)$repweights$weights
     } else {
-      stop("pps bootstrap variance in development")
+      stop("The pps bootstrap variance estimator is under development.")
     }
     if (method == "glm") {
       k <- 1:num_boot
diff --git a/R/cloglogModel.R b/R/cloglogModel.R
index b7b2aa3..06efcf5 100644
--- a/R/cloglogModel.R
+++ b/R/cloglogModel.R
@@ -125,12 +125,12 @@ cloglog_model_nonprobsvy <- function(...) {
 
       if (maxLik_an$code %in% c(3:7, 100)) {
         switch(as.character(maxLik_an$code),
-          "3" = warning("Warning in fitting selection model with maxLik: probably not converged."),
-          "4" = warning("Maxiteration limit reached in fitting selection model by maxLik."),
-          "5" = stop("Inifinite value of log_like in fitting selection model by maxLik, error code 5"),
-          "6" = stop("Inifinite value of gradient in fitting selection model by maxLik, error code 6"),
-          "7" = stop("Inifinite value of hessian in fitting selection model by maxLik, error code 7"),
-          "100" = stop("Error in fitting selection model with maxLik, error code 100:: Bad start."),
+          "3" = warning("Warning in fitting selection model with the `maxLik` package: probably not converged."),
+          "4" = warning("Maxiteration limit reached in fitting selection model by the `maxLik` package."),
+          "5" = stop("Infinite value of log_like in fitting selection model by the `maxLik` package, error code 5."),
+          "6" = stop("Infinite value of gradient in fitting selection model by the `maxLik` package, error code 6."),
+          "7" = stop("Infinite value of hessian in fitting selection model by the `maxLik` package, error code 7."),
+          "100" = stop("Error in fitting selection model with the `maxLik` package, error code 100: Bad start."),
         )
       }
 
@@ -153,10 +153,10 @@ cloglog_model_nonprobsvy <- function(...) {
       )
       if (maxLik_an$convergence %in% c(1, 10, 51, 52)) {
         switch(as.character(maxLik_an$convergence),
-          "1" = warning("Warning in fitting selection model with optim: the iteration limit maxit had been reached."),
-          "10" = warning("Degeneracy of the Nelder Mead simplex in fitting selection model by optim."), # TODO -
-          "51" = warning("Warning from the L BFGS B when fitting by optim."), # TODO -
-          "52" = stop("Indicates an error from the L-BFGS-B method when fitting by optim.")
+          "1" = warning("Warning in fitting selection model with the `optim` function: the iteration limit maxit had been reached."),
+          "10" = warning("Degeneracy of the Nelder Mead simplex in fitting selection model by the `optim` function."), # TODO -
+          "51" = warning("Warning from the L-BFGS-B when fitting by the `optim` function."), # TODO -
+          "52" = stop("Indicates an error from the L-BFGS-B method when fitting by the `optim` function.")
         )
       }
       theta <- maxLik_an$par
@@ -164,7 +164,7 @@ cloglog_model_nonprobsvy <- function(...) {
       grad <- gradient(theta)
       hess <- hessian(theta)
     } else {
-      stop("Provided invalid optimizer.")
+      stop("Provide valid optimizer (`optim` or `maxLik`).")
     }
 
     list(
diff --git a/R/control_inference.R b/R/control_inference.R
index 165dad0..2178fe6 100644
--- a/R/control_inference.R
+++ b/R/control_inference.R
@@ -68,13 +68,13 @@ control_inf <- function(vars_selection = FALSE,
       TRUE
     } else {
       if (!is.logical(keep_boot)) {
-        stop("keep_boot argument for controlInf must be logical")
+        stop("The `keep_boot` argument for `control_inf` function must be logical.")
       } else {
         keep_boot
       }
     },
     nn_exact_se = if (!is.logical(nn_exact_se) & length(nn_exact_se) == 1) {
-      stop("Argument nn_exact_se must be a logical scalar")
+      stop("The `nn_exact_se` argument must be a logical scalar.")
     } else {
       nn_exact_se
     },
diff --git a/R/control_selection.R b/R/control_selection.R
index be3ca16..3a64186 100644
--- a/R/control_selection.R
+++ b/R/control_selection.R
@@ -10,7 +10,7 @@
 #' @param trace logical value. If `TRUE` trace steps of the fitting algorithms. Default is `FALSE`
 #' @param optimizer - optimization function for maximum likelihood estimation.
 #' @param optim_method maximisation method that will be passed to [stats::optim()] function. Default is `BFGS`.
-#' @param maxLik_method maximisation method that will be passed to [maxLik::maxLik()] function. Default is `NR`.
+#' @param maxlik_method maximisation method that will be passed to [maxLik::maxLik()] function. Default is `NR`.
 #' @param dependence logical value - `TRUE` if samples are dependent.
 #' @param key binary key variable
 #' @param est_method_sel Method of estimation for propensity score model.
@@ -51,7 +51,7 @@ control_sel <- function(method = "glm.fit", # perhaps another control function f
                        maxit = 500,
                        trace = FALSE,
                        optimizer = c("maxLik", "optim"),
-                       maxLik_method = "NR",
+                       maxlik_method = "NR",
                        optim_method = "BFGS",
                        dependence = FALSE,
                        key = NULL,
@@ -77,7 +77,7 @@ control_sel <- function(method = "glm.fit", # perhaps another control function f
     maxit = maxit,
     trace = trace,
     optimizer = if (missing(optimizer)) "optim" else optimizer,
-    maxLik_method = maxLik_method,
+    maxlik_method = maxlik_method,
     optim_method = optim_method,
     dependence = dependence,
     key = key,
diff --git a/R/data_manip.R b/R/data_manip.R
index 1c14761..2b1122f 100644
--- a/R/data_manip.R
+++ b/R/data_manip.R
@@ -52,7 +52,7 @@ model_frame <- function(formula, data, weights = NULL, svydesign = NULL, pop_tot
       #  X_rand <- model.matrix(formula, svydesign$variables[, nons_names])# matrix of probability sample with intercept
       #  }
     } else {
-      stop("Variable names in data and svydesign do not match")
+      stop("The names of the variables in the data and in svydesign do not match.")
     }
 
     list(
diff --git a/R/generateData.r b/R/generateData.r
deleted file mode 100644
index 951be3a..0000000
--- a/R/generateData.r
+++ /dev/null
@@ -1,78 +0,0 @@
-#' @import mathjaxr
-NULL
-#' @title Simulation data
-#' @author Łukasz Chrostowski, Maciej Beręsewicz
-#
-#' @description Generate simulated data according to Chen, Li & Wu (2020), section 5.
-#'
-#' \loadmathjax
-#
-#' @param N \code{integer}, population size, default 10000
-#' @param n \code{integer}, big data sample, default 1000
-#' @return \code{genSimData} returns a data.frame, with the following columns:
-#' \itemize{
-#' \item{x0 -- intercept}
-#' \item{x1 -- the first variable based on z1}
-#' \item{x2 -- the second variable based on z2 and x1}
-#' \item{x3 -- the third variable based on z3 and x1 and x2}
-#' \item{x4 -- the third variable based on z4 and x1, x2 and x3}
-#' \item{\mjseqn{y30} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.30}
-#' \item{\mjseqn{y60} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.60}
-#' \item{\mjseqn{y80} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.80}
-#' \item{rho -- true propensity scores for big data such that sum(rho)=n}
-#' \item{srs -- probabilities of inclusion to random sample such that max(srs)/min(srs)=50}
-#' }
-#'
-#' @references Chen, Y., Li, P., & Wu, C. (2020). Doubly Robust Inference With Nonprobability Survey Samples. Journal of the American Statistical Association, 115(532), 2011–2021. doi:10.1080/01621459.2019.1677241
-#'
-#' @examples
-#' ## generate data with N=20000 and n=2000
-#' genSimData(N = 20000, n = 2000)
-#'
-#' ## generate data when big data is almost as N
-#' genSimData(N = 10000, n = 9000)
-#'
-#' @importFrom stats cor lm.fit rbinom rchisq rexp rnorm runif uniroot
-#'
-#' @export
-genSimData <- function(N = 10000, n = 1000) {
-  find_sigma_regsim <- function(sigma, rho) {
-    y <- 2 + x1 + x2 + x3 + x4 + sigma * epsilon
-    model_fit <- lm.fit(x = cbind(1, x1, x2, x3, x4), y = y)
-    res <- cor(y, model_fit$fitted.values) - rho
-    res
-  }
-
-  find_theta_logsim <- function(theta, n) {
-    eta <- theta + 0.1 * x1 + 0.2 * x2 + 0.1 * x3 + 0.2 * x4
-    rho <- exp(eta) / (1 + exp(eta))
-    res <- sum(rho) - n
-    res
-  }
-
-  z1 <- rbinom(N, 1, 0.5)
-  z2 <- runif(N, 0, 2)
-  z3 <- rexp(N)
-  z4 <- rchisq(N, 4)
-  epsilon <- rnorm(N)
-
-  x0 <- rep(1, N)
-  x1 <- z1
-  x2 <- z2 + 0.3 * x1
-  x3 <- z3 + 0.2 * (x1 + x2)
-  x4 <- z4 + 0.1 * (x1 + x2 + x3)
-  sigma30 <- uniroot(find_sigma_regsim, lower = 0, upper = 30, rho = 0.3)$root
-  sigma60 <- uniroot(find_sigma_regsim, lower = 0, upper = 30, rho = 0.6)$root
-  sigma80 <- uniroot(find_sigma_regsim, lower = 0, upper = 30, rho = 0.8)$root
-  y30 <- 2 + x1 + x2 + x3 + x4 + sigma30 * epsilon
-  y60 <- 2 + x1 + x2 + x3 + x4 + sigma60 * epsilon
-  y80 <- 2 + x1 + x2 + x3 + x4 + sigma80 * epsilon
-  theta <- uniroot(find_theta_logsim, lower = -100, upper = 100, n = n)$root
-  rho <- exp(theta + 0.1 * x1 + 0.2 * x2 + 0.1 * x3 + 0.2 * x4) / (1 + exp(theta + 0.1 * x1 + 0.2 * x2 + 0.1 * x3 + 0.2 * x4))
-
-  p <- uniroot(f = function(x) max(x3 + x) - 50 * min(x3 + x), lower = -200, upper = 100)$root
-
-  sim_df <- data.frame(x0, x1, x2, x3, x4, y30, y60, y80, rho, srs = (x3 + p) / 10)
-
-  return(sim_df)
-}
diff --git a/R/internals.R b/R/internals.R
index 22d1581..8d50f64 100644
--- a/R/internals.R
+++ b/R/internals.R
@@ -155,7 +155,7 @@ ff <- function(formula) {
   formula_string <- paste(deparse(formula), collapse = " ")
   formula_parts <- strsplit(formula_string, "~")[[1]]
   if (length(formula_parts) != 2) {
-    stop("The formula must contain exactly one '~' operator.")
+    stop("The `formula` must contain exactly one '~' operator.")
   }
 
   lhs <- trimws(formula_parts[1])
@@ -165,7 +165,7 @@ ff <- function(formula) {
   independent_vars <- strsplit(rhs, "\\s*\\+\\s*")[[1]]
 
   if (any(duplicated(dependent_vars))) {
-    warning("Duplicate dependent variable names detected. They have been made unique.")
+    warning("Duplicate dependent variable names have been detected. They have been made unique.")
     dependent_vars <- unique(dependent_vars)
   }
   outcome_formulas <- vector("list", length(dependent_vars))
@@ -261,7 +261,7 @@ process_family <- function(family_spec) {
   } else if (inherits(family_spec, "family")) {
     family <- family_spec
   } else {
-    stop("Invalid family specification")
+    stop("Invalid family specification.")
   }
   return(family)
 }
diff --git a/R/logitModel.R b/R/logitModel.R
index a1ea3d3..8a1ec70 100644
--- a/R/logitModel.R
+++ b/R/logitModel.R
@@ -97,7 +97,7 @@ logit_model_nonprobsvy <- function(...) {
         logLik = log_like,
         grad = gradient,
         hess = hessian,
-        method = control$maxLik_method,
+        method = control$maxlik_method,
         start = start,
         printLevel = control$print_level
       )
@@ -108,12 +108,12 @@ logit_model_nonprobsvy <- function(...) {
       log_likelihood <- log_like(theta)
       if (maxLik_an$code %in% c(3:7, 100)) {
         switch(as.character(maxLik_an$code),
-          "3" = warning("Warning in fitting selection model with maxLik: probably not converged."),
-          "4" = warning("Max iteration limit reached in fitting selection model by maxLik."),
-          "5" = stop("Inifinite value of log_like in fitting selection model by maxLik, error code 5"),
-          "6" = stop("Inifinite value of gradient in fitting selection model by maxLik, error code 6"),
-          "7" = stop("Inifinite value of hessian in fitting selection model by maxLik, error code 7"),
-          "100" = stop("Error in fitting selection model with maxLik, error code 100:: Bad start."),
+          "3" = warning("Warning in fitting selection model with the `maxLik` package: probably not converged."),
+          "4" = warning("Max iteration limit reached in fitting selection model by the `maxLik` package."),
+          "5" = stop("Infinite value of log_like in fitting selection model by the `maxLik` package, error code 5."),
+          "6" = stop("Infinite value of gradient in fitting selection model by the `maxLik` package, error code 6."),
+          "7" = stop("Infinite value of hessian in fitting selection model by the `maxLik` package, error code 7."),
+          "100" = stop("Error in fitting selection model with the `maxLik` package, error code 100: Bad start."),
         )
       }
     } else if (control$optimizer == "optim") { # TODO add optimParallel for high-dimensional data
@@ -132,10 +132,10 @@ logit_model_nonprobsvy <- function(...) {
       )
       if (maxLik_an$convergence %in% c(1, 10, 51, 52)) {
         switch(as.character(maxLik_an$convergence),
-          "1" = warning("Warning in fitting selection model with optim: the iteration limit maxit had been reached."),
-          "10" = warning("degeneracy of the Nelder Mead simplex in fitting selection model by optim."), # TODO -
-          "51" = warning("Warning from the L BFGS B when fitting by optim."), # TODO -
-          "52" = stop("Indicates an error from the L BFGS B method when fitting by optim.")
+          "1" = warning("Warning in fitting selection model with the `optim` function: the iteration limit maxit had been reached."),
+          "10" = warning("degeneracy of the Nelder Mead simplex in fitting selection model by the `optim` function."), # TODO -
+          "51" = warning("Warning from the L-BFGS-B when fitting by the `optim` function."), # TODO -
+          "52" = stop("Indicates an error from the L-BFGS-B method when fitting by the `optim` function.")
         )
       }
 
@@ -144,7 +144,7 @@ logit_model_nonprobsvy <- function(...) {
       grad <- gradient(theta)
       hess <- hessian(theta)
     } else {
-      stop("Provided invalid optimizer.")
+      stop("Provide valid optimizer (`optim` or `maxLik`).")
     }
 
     list(
diff --git a/R/nonprob.R b/R/nonprob.R
index 60309d6..0fae7b9 100644
--- a/R/nonprob.R
+++ b/R/nonprob.R
@@ -36,43 +36,43 @@ nonprob <- function(data,
   if (missing(method_selection)) method_selection <- "logit"
   if (missing(family_outcome)) family_outcome <- "gaussian"
   if (missing(method_outcome)) method_outcome <- "glm"
-  if (!(method_outcome %in% c("glm", "nn", "pmm"))) stop("Invalid method for outcome variable.")
+  if (!(method_outcome %in% c("glm", "nn", "pmm"))) stop("Invalid method for the `outcome` variable.")
   if (!is.null(svydesign)) {
     if ("svyrep.design" %in% class(svydesign)) stop("We do not currently support the `svyrep.design` class. Provide the survey data in the `survey.design2` class.")
     if ("pps" %in% class(svydesign)) stop("The `as.svrepdesign` function does not allow `pps` designs. For more details, see the `survey` package.")
   }
   if (!is.null(pop_totals)) {
-    if (!is.vector(pop_totals)) stop("pop_totals must be a vector.")
+    if (!is.vector(pop_totals)) stop("The `pop_totals` argument must be a vector.")
   }
   if (!is.null(pop_means)) {
-    if (!is.vector(pop_means)) stop("pop_means must be a vector.")
+    if (!is.vector(pop_means)) stop("The `pop_means` argument must be a vector.")
   }
-  if (!(method_selection %in% c("logit", "cloglog", "probit"))) stop("Invalid method for selection formula.")
-  if (!(family_outcome %in% c("gaussian", "binomial", "poisson"))) stop("Invalid family for outcome formula.")
+  if (!(method_selection %in% c("logit", "cloglog", "probit"))) stop("Invalid method for the `selection` formula.")
+  if (!(family_outcome %in% c("gaussian", "binomial", "poisson"))) stop("Invalid family for the `outcome` formula.")
   if (!is.null(control_selection$key)) {
     if (!(control_selection$key %in% colnames(data)) || !(control_selection$key %in% colnames(svydesign$variables))) {
-      stop("Key variable for overlapping units must be defined with this same name in prob and nonprob sample.")
+      stop("Key variable for overlapping units must be defined with the same name in prob and nonprob sample.")
     }
   }
 
   ## basic checkers
   if (is.null(selection) & is.null(outcome)) {
-    stop("Please provide selection or outcome formula.")
+    stop("Please provide thw `selection` or `outcome` argument.")
   }
 
   # Check formula inputs
   if (!is.null(selection) && !inherits(selection, "formula")) {
-    stop("'selection' must be a formula")
+    stop("The `selection` argument must be a formula.")
   }
   if (!is.null(outcome) && !inherits(outcome, "formula")) {
-    stop("'outcome' must be a formula")
+    stop("The `outcome` argument must be a formula.")
   }
   if (!is.null(target) && !inherits(target, "formula")) {
-    stop("'target' must be a formula")
+    stop("The `target` argument must be a formula.")
   }
 
   if (inherits(selection, "formula") && (is.null(outcome) || inherits(outcome, "formula") == FALSE)) {
-    if (inherits(target, "formula") == FALSE) stop("Please provide target variable")
+    if (inherits(target, "formula") == FALSE) stop("Please provide the `target` argument.")
     model_used <- "P"
   }
 
@@ -86,27 +86,27 @@ nonprob <- function(data,
 
   # Check numeric inputs
   if (!is.null(pop_size) && !is.numeric(pop_size)) {
-    stop("'pop_size' must be numeric")
+    stop("The `pop_size` argument must be numeric scalar.")
   }
   if (!is.null(weights) && !is.numeric(weights)) {
-    stop("'weights' must be numeric")
+    stop("The `weights` argument must be a numeric vector.")
   }
 
   # Check weights length
   if (!is.null(weights) && length(weights) != nrow(data)) {
-    stop("Length of weights must match number of rows in data")
+    stop("Length of the `weights` argument must match the number of rows in data.")
   }
 
   if (!is.null(pop_totals) && !is.null(pop_means)) {
-    stop("Cannot specify both pop_totals and pop_means")
+    stop("Specify one of the `pop_totals' or `pop_means' arguments, not both.")
   }
 
   if (!is.null(pop_size)) {
     if (pop_size <= 0) {
-      stop("pop_size must be positive")
+      stop("The `pop_size` argument must be positive.")
     }
     if (pop_size < nrow(data)) {
-      stop("pop_size cannot be smaller than sample size")
+      stop("The `pop_size` argument cannot be smaller than sample size.")
     }
   }
 
diff --git a/R/nonprobDR.R b/R/nonprobDR.R
index c6f198f..67e6fac 100644
--- a/R/nonprobDR.R
+++ b/R/nonprobDR.R
@@ -177,7 +177,7 @@ nonprobDR <- function(selection,
         pop_totals <- c(pop_size, pop_size * pop_means)
         names(pop_totals) <- c("(Intercept)", names(pop_means))
       } else {
-        stop("pop_size must be defined when estimating with pop_means.")
+        stop("The `pop_size` argument must be specified when the `pop_means` argument is provided.")
       }
     }
 
@@ -232,7 +232,7 @@ nonprobDR <- function(selection,
     }
     ############
   } else {
-    stop("Please, provide only one of svydesign object or pop_totals/pop_means.")
+    stop("Specify one of the `svydesign`, `pop_totals' or `pop_means' arguments, not all.")
   }
   for (k in 1:outcomes$l) {
     outcome <- outcomes$outcome[[k]]
@@ -691,7 +691,7 @@ nonprobDR <- function(selection,
 
       mu_hat <- 1 / N_nons * sum((1 / ps_nons) * (weights * (OutcomeModel$y_nons - y_nons_pred))) + y_rand_pred
     } else {
-      stop("Please, provide only one of svydesign object or pop_totals/pop_means.")
+      stop("Specify one of the `svydesign`, `pop_totals' or `pop_means' arguments, not all.")
     }
     ys[[k]] <- as.numeric(y_nons)
 
@@ -803,7 +803,7 @@ nonprobDR <- function(selection,
         stat[, k] <- boot_obj$stat
         # mu_hat <- boot_obj$mu
       } else {
-        stop("Invalid method for variance estimation.")
+        stop("Invalid `var_method` argument for the variance estimation.")
       }
       SE <- sqrt(var)
       alpha <- control_inference$alpha
diff --git a/R/nonprobIPW.R b/R/nonprobIPW.R
index ff0f14e..c0b20c2 100644
--- a/R/nonprobIPW.R
+++ b/R/nonprobIPW.R
@@ -44,7 +44,7 @@ nonprobIPW <- function(selection,
   nfolds <- control_selection$nfolds
   eps <- control_selection$epsilon
   rep_type <- control_inference$rep_type
-  if (!(target[3] == "NULL()")) stop("ill-defined formula for the target")
+  if (!(target[3] == "NULL()")) stop("Ill-defined formula for the `target` argument.")
   # formula for outcome variable if target defined
   dependents <- paste(selection, collapse = " ")
   outcome <- outcome_init <- stats::as.formula(paste(target[2], dependents))
@@ -194,7 +194,7 @@ nonprobIPW <- function(selection,
         pop_totals <- c(pop_size, pop_size * pop_means)
         names(pop_totals) <- c("(Intercept)", names(pop_means))
       } else {
-        stop("pop_size must be defined when estimating with pop_means.")
+        stop("The `pop_size` argument must be specified when the `pop_means` argument is provided.")
       }
     }
 
@@ -360,7 +360,7 @@ nonprobIPW <- function(selection,
     )
     #######################################
   } else {
-    stop("Please, provide svydesign object or pop_totals/pop_means.")
+    stop("Specify one of the `svydesign`, `pop_totals' or `pop_means' arguments, not all.")
   }
   mu_hats <- numeric(length = outcomes$l)
   for (k in 1:outcomes$l) {
@@ -494,7 +494,7 @@ nonprobIPW <- function(selection,
         SE_values[[k]] <- data.frame(t(data.frame("SE" = c(nonprob = NA, prob = NA))))
       }
     } else {
-      stop("Invalid method for variance estimation.")
+      stop("Invalid `var_method` for the variance estimation.")
     }
     SE <- sqrt(var)
     alpha <- control_inference$alpha
diff --git a/R/nonprobMI.R b/R/nonprobMI.R
index 45c83cc..4beaaf2 100644
--- a/R/nonprobMI.R
+++ b/R/nonprobMI.R
@@ -258,7 +258,7 @@ nonprobMI <- function(outcome,
           pop_totals <- c(pop_size, pop_size * pop_means)
           names(pop_totals) <- c("(Intercept)", names(pop_means))
         } else {
-          stop("pop_size must be defined when estimating with pop_means.")
+          stop("The `pop_size` argument must be specified when the `pop_means` argument is provided.")
         }
       }
       Model <- model_frame(formula = outcome, data = data, pop_totals = pop_totals)
@@ -330,7 +330,7 @@ nonprobMI <- function(outcome,
       OutcomeList[[k]]$model_frame <- Model$model_frame_rand
       mu_hat <- y_rand_pred
     } else {
-      stop("Please, provide svydesign object or pop_totals/pop_means.")
+      stop("Specify one of the `svydesign`, `pop_totals' or `pop_means' arguments, not all.")
     }
 
     if (isTRUE(attr(model_obj$model, "method") == "pmm") & !(control_inference$nn_exact_se)) {
@@ -425,7 +425,7 @@ nonprobMI <- function(outcome,
           prob = NA
         ))))
       } else {
-        stop("Invalid method for variance estimation.")
+        stop("Invalid `var_method` for the variance estimation.")
       }
       SE <- sqrt(var)
       alpha <- control_inference$alpha
diff --git a/R/probitModel.R b/R/probitModel.R
index eae8435..3ebda1a 100644
--- a/R/probitModel.R
+++ b/R/probitModel.R
@@ -109,19 +109,19 @@ probit_model_nonprobsvy <- function(...) {
         logLik = log_like,
         grad = gradient, # fixed
         hess = hessian, # fixed
-        method = control$maxLik_method,
+        method = control$maxlik_method,
         start = start,
         printLevel = control$print_level
       ) # NA in gradient for Newton-Raphson method
 
       if (maxLik_an$code %in% c(3:7, 100)) {
         switch(as.character(maxLik_an$code),
-          "3" = warning("Warning in fitting selection model with maxLik: probably not converged."),
-          "4" = warning("Maxiteration limit reached in fitting selection model by maxLik."),
-          "5" = stop("Inifinite value of log_like in fitting selection model by maxLik, error code 5"),
-          "6" = stop("Inifinite value of gradient in fitting selection model by maxLik, error code 6"),
-          "7" = stop("Inifinite value of hessian in fitting selection model by maxLik, error code 7"),
-          "100" = stop("Error in fitting selection model with maxLik, error code 100:: Bad start."),
+               "3" = warning("Warning in fitting selection model with the `maxLik` package: probably not converged."),
+               "4" = warning("Maxiteration limit reached in fitting selection model by the `maxLik` package."),
+               "5" = stop("Infinite value of log_like in fitting selection model by the `maxLik` package, error code 5."),
+               "6" = stop("Infinite value of gradient in fitting selection model by the `maxLik` package, error code 6."),
+               "7" = stop("Infinite value of hessian in fitting selection model by the `maxLik` package, error code 7."),
+               "100" = stop("Error in fitting selection model with the `maxLik` package, error code 100: Bad start."),
         )
       }
 
@@ -144,10 +144,10 @@ probit_model_nonprobsvy <- function(...) {
       )
       if (maxLik_an$convergence %in% c(1, 10, 51, 52)) {
         switch(as.character(maxLik_an$convergence),
-          "1" = warning("Warning in fitting selection model with optim: the iteration limit maxit had been reached."),
-          "10" = warning("Degeneracy of the Nelder Mead simplex in fitting selection model by optim."), # TODO -
-          "51" = warning("Warning from the L BFGS B when fitting by optim."), # TODO -
-          "52" = stop("Indicates an error from the L-BFGS-B method when fitting by optim.")
+               "1" = warning("Warning in fitting selection model with the `optim` function: the iteration limit maxit had been reached."),
+               "10" = warning("Degeneracy of the Nelder Mead simplex in fitting selection model by the `optim` function."), # TODO -
+               "51" = warning("Warning from the L-BFGS-B when fitting by the `optim` function."), # TODO -
+               "52" = stop("Indicates an error from the L-BFGS-B method when fitting by the `optim` function.")
         )
       }
 
diff --git a/R/simple_methods.R b/R/simple_methods.R
index 1dc902f..89bb7e8 100644
--- a/R/simple_methods.R
+++ b/R/simple_methods.R
@@ -326,39 +326,39 @@ deviance.nonprobsvy <- function(object,
 check_balance.nonprobsvy <- function(x, object, dig = 10) {
   # Input validation
   if (!inherits(x, "formula")) {
-    stop("'x' must be a formula")
+    stop("The `x` argument must be a formula.")
   }
 
   if (missing(object) || is.null(object)) {
-    stop("'object' is required")
+    stop("The `object` argument is required.")
   }
 
   if (!any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) {
-    stop("No estimated weights available. Only IPW or DR methods are supported.")
+    stop("No estimated weights available. Only the IPW or the DR methods are supported.")
   }
 
   if (!is.numeric(dig) || dig < 0) {
-    stop("'dig' must be a non-negative number")
+    stop("The `dig` argument must be a non-negative number")
   }
 
   if (nrow(object$data) == 0) {
-    stop("Empty dataset")
+    stop("An empty dataset detected (zero rows).")
   }
 
   if (sum(object$weights) == 0) {
-    stop("Sum of weights is zero")
+    stop("The sum of weights is zero.")
   }
 
   # Extract variables from formula
   vars <- all.vars(x)
   if (length(vars) == 0) {
-    stop("No variables specified in formula")
+    stop("No variables specified in the `formula` argument")
   }
 
   # Check if all variables exist in the data
   missing_vars <- setdiff(vars, names(object$data))
   if (length(missing_vars) > 0) {
-    stop(sprintf("The following variables are not present in the dataset: %s",
+    stop(sprintf("The following variables are not present in the dataset: %s.",
                  paste(missing_vars, collapse = ", ")))
   }
 
@@ -366,7 +366,7 @@ check_balance.nonprobsvy <- function(x, object, dig = 10) {
   calculate_totals <- function(var, data) {
     # Check for NAs
     if (sum(is.na(data[[var]])) > 0) {
-      warning(sprintf("NA values found in variable %s", var))
+      warning(sprintf("NA values found in variable %s.", var))
     }
 
     # For categorical variables, handle each level
@@ -375,7 +375,7 @@ check_balance.nonprobsvy <- function(x, object, dig = 10) {
       totals <- sapply(levels, function(lvl) {
         data_subset <- data[data[[var]] == lvl, ]
         if (nrow(data_subset) < 5) {
-          warning(sprintf("Small group size (< 5) for level %s in variable %s", lvl, var))
+          warning(sprintf("Small group size (< 5) for level %s in variable %s.", lvl, var))
         }
         sum(data_subset$weights)
       })
@@ -395,7 +395,7 @@ check_balance.nonprobsvy <- function(x, object, dig = 10) {
   nonprob_totals <- tryCatch({
     unlist(lapply(vars, function(var) calculate_totals(var, data)))
   }, error = function(e) {
-    stop(sprintf("Error calculating nonprobability totals: %s", e$message))
+    stop(sprintf("Error calculating nonprobability totals: %s.", e$message))
   })
 
   # Calculate probability totals
@@ -407,13 +407,13 @@ check_balance.nonprobsvy <- function(x, object, dig = 10) {
       names(svy_totals) <- names(svy_total)
       svy_totals
     }, error = function(e) {
-      stop(sprintf("Error calculating survey totals: %s", e$message))
+      stop(sprintf("Error calculating survey totals: %s.", e$message))
     })
   } else {
     # Use population totals
     prob_totals <- object$selection$pop_totals
     if (is.null(prob_totals)) {
-      stop("No population totals available")
+      stop("The `pop_totals` argument is null.")
     }
   }
 
@@ -421,7 +421,7 @@ check_balance.nonprobsvy <- function(x, object, dig = 10) {
   diff <- tryCatch({
     round(nonprob_totals - prob_totals[names(nonprob_totals)], digits = dig)
   }, error = function(e) {
-    stop(sprintf("Error calculating differences: %s", e$message))
+    stop(sprintf("Error calculating differences: %s.", e$message))
   })
 
   # Return results with meaningful names
@@ -455,10 +455,10 @@ nonprobsvytotal.nonprobsvy <- function(x, object, interaction = FALSE) {
   var <- lhs.vars(x)
   # Validate inputs
   if (!is.null(var)) {
-    stop("no dependend variable needed for this method, please remove it and try again")
+    stop("No dependend variable needed for this method, please remove it and try again.")
   }
   if (nrow(object$data) == 0) {
-    stop("Empty dataset")
+    stop("An empty dataset detected (zero rows).")
   }
   class_nonprob <- class(object)[2]
   if (!class_nonprob %in% c("nonprobsvy_ipw", "nonprobsvy_dr", "nonprobsvy_mi")) {
@@ -570,7 +570,7 @@ nonprobsvymean.nonprobsvy <- function(x, object, interaction = FALSE) {
   }
 
   if (nrow(object$data) == 0) {
-    stop("Empty dataset")
+    stop("An empty dataset detected (zero rows).")
   }
 
   class_nonprob <- class(object)[2]
@@ -659,12 +659,12 @@ nonprobsvyby.nonprobsvy <- function(y, by, object, FUN) {
   }
 
   if (nrow(object$data) == 0) {
-    stop("Empty dataset")
+    stop("An empty dataset detected (zero rows).")
   }
 
   class_nonprob <- class(object)[2]
   if (!class_nonprob %in% c("nonprobsvy_ipw", "nonprobsvy_dr", "nonprobsvy_mi")) {
-    stop("Invalid nonprob object class")
+    stop("An invalid `nonprob` object class.")
   }
 
   variables <- rhs.vars(y)
@@ -673,13 +673,13 @@ nonprobsvyby.nonprobsvy <- function(y, by, object, FUN) {
   # Validate inputs
   missing_vars <- setdiff(groups, names(object$data))
   if (length(missing_vars) > 0) {
-    stop(sprintf("The following variables are not present in the dataset: %s",
+    stop(sprintf("The following variables are not present in the dataset: %s.",
                  paste(missing_vars, collapse = ", ")))
   }
 
   if (!is.null(variables) && !all(variables %in% names(object$data))) {
     missing_dep_vars <- setdiff(variables, names(object$y))
-    stop(sprintf("The following dependent variables are not present: %s",
+    stop(sprintf("The following dependent variables are not present: %s.",
                  paste(missing_dep_vars, collapse = ", ")))
   }
 
diff --git a/man/control_sel.Rd b/man/control_sel.Rd
index 8de7095..bba60ec 100644
--- a/man/control_sel.Rd
+++ b/man/control_sel.Rd
@@ -10,7 +10,7 @@ control_sel(
   maxit = 500,
   trace = FALSE,
   optimizer = c("maxLik", "optim"),
-  maxLik_method = "NR",
+  maxlik_method = "NR",
   optim_method = "BFGS",
   dependence = FALSE,
   key = NULL,
@@ -43,7 +43,7 @@ control_sel(
 \item optimization function for maximum likelihood estimation.
 }}
 
-\item{maxLik_method}{maximisation method that will be passed to \code{\link[maxLik:maxLik]{maxLik::maxLik()}} function. Default is \code{NR}.}
+\item{maxlik_method}{maximisation method that will be passed to \code{\link[maxLik:maxLik]{maxLik::maxLik()}} function. Default is \code{NR}.}
 
 \item{optim_method}{maximisation method that will be passed to \code{\link[stats:optim]{stats::optim()}} function. Default is \code{BFGS}.}
 
diff --git a/man/genSimData.Rd b/man/genSimData.Rd
deleted file mode 100644
index 337a7e1..0000000
--- a/man/genSimData.Rd
+++ /dev/null
@@ -1,47 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/generateData.r
-\name{genSimData}
-\alias{genSimData}
-\title{Simulation data}
-\usage{
-genSimData(N = 10000, n = 1000)
-}
-\arguments{
-\item{N}{\code{integer}, population size, default 10000}
-
-\item{n}{\code{integer}, big data sample, default 1000}
-}
-\value{
-\code{genSimData} returns a data.frame, with the following columns:
-\itemize{
-\item{x0 -- intercept}
-\item{x1 -- the first variable based on z1}
-\item{x2 -- the second variable based on z2 and x1}
-\item{x3 -- the third variable based on z3 and x1 and x2}
-\item{x4 -- the third variable based on z4 and x1, x2 and x3}
-\item{\mjseqn{y30} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.30}
-\item{\mjseqn{y60} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.60}
-\item{\mjseqn{y80} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.80}
-\item{rho -- true propensity scores for big data such that sum(rho)=n}
-\item{srs -- probabilities of inclusion to random sample such that max(srs)/min(srs)=50}
-}
-}
-\description{
-Generate simulated data according to Chen, Li & Wu (2020), section 5.
-
-\loadmathjax
-}
-\examples{
-## generate data with N=20000 and n=2000
-genSimData(N = 20000, n = 2000)
-
-## generate data when big data is almost as N
-genSimData(N = 10000, n = 9000)
-
-}
-\references{
-Chen, Y., Li, P., & Wu, C. (2020). Doubly Robust Inference With Nonprobability Survey Samples. Journal of the American Statistical Association, 115(532), 2011–2021. doi:10.1080/01621459.2019.1677241
-}
-\author{
-Łukasz Chrostowski, Maciej Beręsewicz
-}