diff --git a/DESCRIPTION b/DESCRIPTION index 23088fe..a032da2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: nonprobsvy Type: Package Title: Inference Based on Non-Probability Samples -Version: 0.1.2 +Version: 0.2.0 Authors@R: c(person(given = "Ɓukasz", family = "Chrostowski", diff --git a/NEWS.md b/NEWS.md index 001f06d..96be6ed 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # nonprobsvy News and Updates -# nonprobsvy 0.2 +## nonprobsvy 0.2 ------------------------------------------------------------------------ @@ -12,8 +12,8 @@ - function `genSimData` removed completely as it is not used anywhere in the package. - argument `maxLik_method` renamed to `maxlik_method` in the - `control_sel` function. - + `control_sel` function. + ### Features - two additional datasets have been included: `jvs` (Job Vacancy @@ -35,13 +35,15 @@ ### Other - more informative error messages added. +- documentation improved. +- switching completely to snake_case. ### Documentation - annotation has been added that arguments such as `strata`, `subset` and `na_action` are not supported for the time being. -# nonprobsvy 0.1.1 +## nonprobsvy 0.1.1 ------------------------------------------------------------------------ @@ -84,7 +86,7 @@ - added unit tests for variable selection models and mi estimation with vector of population totals available -# nonprobsvy 0.1.0 +## nonprobsvy 0.1.0 ------------------------------------------------------------------------ diff --git a/R/boot_dr.R b/R/boot_dr.R index 759ce05..62ffbda 100644 --- a/R/boot_dr.R +++ b/R/boot_dr.R @@ -1,4 +1,4 @@ -bootDR <- function(outcome, +boot_dr <- function(outcome, data, svydesign, SelectionModel, @@ -52,7 +52,7 @@ bootDR <- function(outcome, p <- ncol(X) y_rand <- vector(mode = "numeric", length = n_rand) y <- c(y_rand, OutcomeModel$y_nons) # outcome variable for joint model - var_obj <- bootDR_sel( + var_obj <- boot_dr_sel( X = X, R = R, y = y, @@ -261,7 +261,7 @@ bootDR <- function(outcome, #' @importFrom parallel makeCluster #' @importFrom parallel stopCluster #' @importFrom doParallel registerDoParallel -bootDR_multicore <- function(outcome, +boot_dr_multicore <- function(outcome, data, svydesign, SelectionModel, @@ -316,7 +316,7 @@ bootDR_multicore <- function(outcome, p <- ncol(X) y_rand <- vector(mode = "numeric", length = n_rand) y <- c(y_rand, OutcomeModel$y_nons) # outcome variable for joint model - var_obj <- bootDR_sel_multicore( + var_obj <- boot_dr_sel_multicore( X = X, R = R, y = y, @@ -346,9 +346,9 @@ bootDR_multicore <- function(outcome, doParallel::registerDoParallel(cl) on.exit(parallel::stopCluster(cl)) parallel::clusterExport(cl = cl, varlist = c( - "internal_selection", "internal_outcome", "logit_model_nonprobsvy", "start_fit", "get_method", "controlSel", "theta_h_estimation", + "internal_selection", "internal_outcome", "logit_model_nonprobsvy", "start_fit", "get_method", "control_sel", "theta_h_estimation", "mle", "mu_hatDR", "probit_model_nonprobsvy", "cloglog_model_nonprobsvy", "glm_nonprobsvy", "nn_nonprobsvy", "pmm_nonprobsvy", - "gaussian_nonprobsvy", "poisson_nonprobsvy", "binomial_nonprobsvy", "nonprobMI_fit", "controlOut" + "gaussian_nonprobsvy", "poisson_nonprobsvy", "binomial_nonprobsvy", "nonprob_mi_fit", "control_out" )) if (is.null(pop_totals)) { N <- sum(weights_rand) diff --git a/R/boot_dr_sel.R b/R/boot_dr_sel.R index ebf6851..e6831e8 100644 --- a/R/boot_dr_sel.R +++ b/R/boot_dr_sel.R @@ -1,4 +1,4 @@ -bootDR_sel <- function(X, +boot_dr_sel <- function(X, R, y, svydesign, @@ -114,7 +114,7 @@ bootDR_sel <- function(X, #' @importFrom parallel makeCluster #' @importFrom parallel stopCluster #' @importFrom doParallel registerDoParallel -bootDR_sel_multicore <- function(X, +boot_dr_sel_multicore <- function(X, svydesign, R, y, @@ -147,7 +147,7 @@ bootDR_sel_multicore <- function(X, doParallel::registerDoParallel(cl) on.exit(parallel::stopCluster(cl)) parallel::clusterExport(cl = cl, varlist = c( - "internal_selection", "logit_model_nonprobsvy", "start_fit", "get_method", "controlSel", "mle", + "internal_selection", "logit_model_nonprobsvy", "start_fit", "get_method", "control_sel", "mle", "probit_model_nonprobsvy", "cloglog_model_nonprobsvy", "mm", "u_theta_beta_dr", "mu_hatDR" )) diff --git a/R/boot_ipw.R b/R/boot_ipw.R index 883c3d5..e6057fa 100644 --- a/R/boot_ipw.R +++ b/R/boot_ipw.R @@ -1,4 +1,4 @@ -bootIPW <- function(X_rand, +boot_ipw <- function(X_rand, X_nons, svydesign, weights, @@ -183,7 +183,7 @@ bootIPW <- function(X_rand, #' @importFrom parallel makeCluster #' @importFrom parallel stopCluster #' @importFrom doParallel registerDoParallel -bootIPW_multicore <- function(X_rand, +boot_ipw_multicore <- function(X_rand, X_nons, svydesign, weights, @@ -225,7 +225,7 @@ bootIPW_multicore <- function(X_rand, doParallel::registerDoParallel(cl) on.exit(parallel::stopCluster(cl)) parallel::clusterExport(cl = cl, varlist = c( - "internal_selection", "logit_model_nonprobsvy", "start_fit", "get_method", "controlSel", + "internal_selection", "logit_model_nonprobsvy", "start_fit", "get_method", "control_sel", "mle", "mu_hatIPW", "probit_model_nonprobsvy", "cloglog_model_nonprobsvy", "theta_h_estimation" )) diff --git a/R/boot_mi.R b/R/boot_mi.R index 2a35170..a923a7b 100644 --- a/R/boot_mi.R +++ b/R/boot_mi.R @@ -4,7 +4,7 @@ #' @importFrom utils setTxtProgressBar #' @importFrom utils txtProgressBar -bootMI <- function(X_rand, +boot_mi <- function(X_rand, X_nons, weights, y, @@ -125,7 +125,7 @@ bootMI <- function(X_rand, X_rand_strap <- X_rand[which(rep_weights[, k] != 0), ] weights_rand_strap <- weights_rand_strap_svy[strap_rand_svy] - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = X_nons_strap, query = X_rand_strap, k = control_outcome$k, @@ -187,7 +187,7 @@ bootMI <- function(X_rand, model_rand <- switch(control_outcome$predictive_match, { # 1 - nonprobMI_nn( + nonprob_mi_nn( data = y_strap, query = y_rand_strap, k = control_outcome$k, @@ -196,7 +196,7 @@ bootMI <- function(X_rand, ) }, { # 2 - nonprobMI_nn( + nonprob_mi_nn( data = y_nons_strap, query = y_rand_strap, k = control_outcome$k, @@ -291,7 +291,7 @@ bootMI <- function(X_rand, X_nons_strap <- X_nons[strap, , drop = FALSE] y_strap <- y[strap] - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = X_nons_strap, query = t(pop_totals / N), k = control_outcome$k, @@ -339,7 +339,7 @@ bootMI <- function(X_rand, model_rand <- switch(control_outcome$predictive_match, { # 1 - nonprobMI_nn( + nonprob_mi_nn( data = y_strap, query = y_strap_rand, k = control_outcome$k, @@ -348,7 +348,7 @@ bootMI <- function(X_rand, ) }, { # 2 - nonprobMI_nn( + nonprob_mi_nn( data = y_strap_nons, query = y_strap_rand, k = control_outcome$k, @@ -410,7 +410,7 @@ bootMI <- function(X_rand, #' @importFrom parallel makeCluster #' @importFrom parallel stopCluster #' @importFrom doParallel registerDoParallel -bootMI_multicore <- function(X_rand, +boot_mi_multicore <- function(X_rand, X_nons, weights, y, @@ -450,8 +450,8 @@ bootMI_multicore <- function(X_rand, doParallel::registerDoParallel(cl) on.exit(parallel::stopCluster(cl)) parallel::clusterExport(cl = cl, varlist = c( - "internal_selection", "logit_model_nonprobsvy", "start_fit", "get_method", "controlSel", - "mle", "mu_hatIPW", "probit_model_nonprobsvy", "cloglog_model_nonprobsvy", "nonprobMI_nn" + "internal_selection", "logit_model_nonprobsvy", "start_fit", "get_method", "control_sel", + "mle", "mu_hatIPW", "probit_model_nonprobsvy", "cloglog_model_nonprobsvy", "nonprob_mi_nn" )) if (is.null(pop_totals)) { @@ -508,7 +508,7 @@ bootMI_multicore <- function(X_rand, X_rand_strap <- X_rand[strap_rand_svy, , drop = FALSE] weights_strap_rand <- weights_rand_strap_svy[strap_rand_svy] - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = X_nons_strap, query = X_rand_strap, k = control_outcome$k, @@ -556,7 +556,7 @@ bootMI_multicore <- function(X_rand, model_rand <- switch(control_outcome$predictive_match, { # 1 - nonprobMI_nn( + nonprob_mi_nn( data = y_strap, query = y_rand_strap, k = control_outcome$k, @@ -565,7 +565,7 @@ bootMI_multicore <- function(X_rand, ) }, { # 2 - nonprobMI_nn( + nonprob_mi_nn( data = y_nons_strap, query = y_rand_strap, k = control_outcome$k, @@ -616,7 +616,7 @@ bootMI_multicore <- function(X_rand, X_nons_strap <- X_nons[strap, , drop = FALSE] y_strap <- y[strap] - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = X_nons_strap, query = t(pop_totals / N), k = control_outcome$k, @@ -650,7 +650,7 @@ bootMI_multicore <- function(X_rand, y_strap_nons <- family_nonprobsvy$linkinv(eta_nons) - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = y_strap_nons, query = y_strap_rand, k = control_outcome$k, diff --git a/R/control_inference.R b/R/control_inference.R index 2178fe6..e3d9465 100644 --- a/R/control_inference.R +++ b/R/control_inference.R @@ -1,6 +1,6 @@ #' @title Control parameters for inference #' -#' @description \code{control_inf} constructs a list with all necessary control parameters +#' @description \code{control_inf} constructs a `list` with all necessary control parameters #' for statistical inference. #' #' @param vars_selection If `TRUE`, then the variables selection model is used. diff --git a/R/data_manip.R b/R/data_manip.R index 2b1122f..485e84b 100644 --- a/R/data_manip.R +++ b/R/data_manip.R @@ -1,5 +1,11 @@ # create an object with model frames and matrices to preprocess -model_frame <- function(formula, data, weights = NULL, svydesign = NULL, pop_totals = NULL, pop_size = NULL, flag = TRUE) { +model_frame <- function(formula, + data, + weights = NULL, + svydesign = NULL, + pop_totals = NULL, + pop_size = NULL, + flag = TRUE) { if (!is.null(svydesign)) { ##### Model frame for nonprobability sample ##### model_Frame <- model.frame(formula, data) diff --git a/R/internals.R b/R/internals.R index 8d50f64..bd59108 100644 --- a/R/internals.R +++ b/R/internals.R @@ -59,7 +59,7 @@ internal_outcome <- function(outcome, family_outcome, start_outcome) { # estimation - model_nons <- nonprobMI_fit( + model_nons <- nonprob_mi_fit( outcome = outcome, data = data, weights = weights, @@ -201,7 +201,7 @@ mu_hatIPW <- function(y, mu_hat } -nonprobMI_fit <- function(outcome, +nonprob_mi_fit <- function(outcome, data, weights, svydesign = NULL, diff --git a/R/bias_correction_ipw.R b/R/ipw_bias_correction.R similarity index 99% rename from R/bias_correction_ipw.R rename to R/ipw_bias_correction.R index 2df0ae3..227e943 100644 --- a/R/bias_correction_ipw.R +++ b/R/ipw_bias_correction.R @@ -15,6 +15,7 @@ mm <- function(X, nleqslv_global, nleqslv_xscalm, boot = FALSE) { + method_selection_function <- paste(method_selection, "_model_nonprobsvy", sep = "") method <- get_method(method_selection_function) inv_link <- method$make_link_inv diff --git a/R/nn.R b/R/nn.R index ff73a1a..0ab92a9 100644 --- a/R/nn.R +++ b/R/nn.R @@ -13,7 +13,7 @@ nn_nonprobsvy <- function(outcome, model_frame = NULL, start_outcome = NULL) { # TODO consider add data standardization before modelling - model_nons <- nonprobMI_nn( + model_nons <- nonprob_mi_nn( data = X_nons, query = X_nons, k = control$k, @@ -21,7 +21,7 @@ nn_nonprobsvy <- function(outcome, searchtype = control$searchtype ) if (is.null(pop_totals)) { - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = X_nons, query = X_rand, k = control$k, @@ -42,7 +42,7 @@ nn_nonprobsvy <- function(outcome, # FUN=\(x) mean(sample_nonprob$short_[x]) ) } else { - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = X_nons, query = t(as.matrix(pop_totals / pop_totals[1])), k = control$k, @@ -73,7 +73,7 @@ nn_nonprobsvy <- function(outcome, } -nonprobMI_nn <- function(data, +nonprob_mi_nn <- function(data, query, k, treetype, @@ -126,7 +126,7 @@ nn_exact <- function(pi_ij, y_nons_b <- y[boot_samp] x_nons_b <- X_nons[boot_samp, , drop = FALSE] - YY <- nonprobMI_nn( + YY <- nonprob_mi_nn( data = x_nons_b, query = X_rand, k = k, diff --git a/R/nonprob.R b/R/nonprob.R index d4417fa..357ee5d 100644 --- a/R/nonprob.R +++ b/R/nonprob.R @@ -114,18 +114,18 @@ nonprob <- function(data, if (!inherits(target, "formula")) { stop("Please provide the `target` argument as a formula.") } - model_used <- "IPW" + model_used <- "ipw" } else { - model_used <- "DR" + model_used <- "dr" } } else if (inherits(outcome, "formula")) { - model_used <- "MI" + model_used <- "mi" } ## model estimates model_estimates <- switch(model_used, - IPW = nonprobIPW( + ipw = nonprob_ipw( selection = selection, target = target, data = data, @@ -147,7 +147,7 @@ nonprob <- function(data, se = se, ... ), - MI = nonprobMI( + mi = nonprob_mi( outcome = outcome, data = data, svydesign = svydesign, @@ -169,7 +169,7 @@ nonprob <- function(data, se = se, ... ), - DR = nonprobDR( + dr = nonprob_dr( selection = selection, outcome = outcome, data = data, diff --git a/R/nonprob_dr.R b/R/nonprob_dr.R index 67e6fac..b726fa5 100644 --- a/R/nonprob_dr.R +++ b/R/nonprob_dr.R @@ -13,7 +13,7 @@ #' @import Rcpp #' @importFrom Rcpp evalCpp -nonprobDR <- function(selection, +nonprob_dr <- function(selection, outcome, data, svydesign, @@ -731,7 +731,7 @@ nonprobDR <- function(selection, SE_values[[k]] <- data.frame(t(data.frame("SE" = c(prob = se_prob, nonprob = se_nonprob)))) } else if (var_method == "bootstrap") { if (control_inference$cores > 1) { - boot_obj <- bootDR_multicore( + boot_obj <- boot_dr_multicore( outcome = outcome, data = data, svydesign = svydesign, @@ -765,7 +765,7 @@ nonprobDR <- function(selection, verbose = verbose ) } else { - boot_obj <- bootDR( + boot_obj <- boot_dr( outcome = outcome, data = data, svydesign = svydesign, diff --git a/R/nonprob_ipw.R b/R/nonprob_ipw.R index c0b20c2..a5406fd 100644 --- a/R/nonprob_ipw.R +++ b/R/nonprob_ipw.R @@ -11,7 +11,7 @@ #' @import Rcpp #' @importFrom Rcpp evalCpp -nonprobIPW <- function(selection, +nonprob_ipw <- function(selection, target, data, svydesign, @@ -435,7 +435,7 @@ nonprobIPW <- function(selection, } } else if (var_method == "bootstrap") { # TODO add ys, mu_hats instead of y_nons, if (control_inference$cores > 1) { - boot_obj <- bootIPW_multicore( + boot_obj <- boot_ipw_multicore( X_rand = X_rand, X_nons = X_nons, svydesign = svydesign, @@ -462,7 +462,7 @@ nonprobIPW <- function(selection, verbose = verbose ) } else { - boot_obj <- bootIPW( + boot_obj <- boot_ipw( X_rand = X_rand, X_nons = X_nons, svydesign = svydesign, diff --git a/R/nonprob_mi.R b/R/nonprob_mi.R index 4beaaf2..2e42b20 100644 --- a/R/nonprob_mi.R +++ b/R/nonprob_mi.R @@ -13,7 +13,7 @@ #' @import Rcpp #' @importFrom Rcpp evalCpp -nonprobMI <- function(outcome, +nonprob_mi <- function(outcome, data, svydesign, pop_totals, @@ -375,7 +375,7 @@ nonprobMI <- function(outcome, } else if (control_inference$var_method == "bootstrap") { # TODO for pop_totals # bootstrap variance if (control_inference$cores > 1) { - boot_obj <- bootMI_multicore( + boot_obj <- boot_mi_multicore( X_rand = X_rand, X_nons = X_nons, weights = weights, @@ -396,7 +396,7 @@ nonprobMI <- function(outcome, verbose = verbose ) } else { - boot_obj <- bootMI( + boot_obj <- boot_mi( X_rand = X_rand, X_nons = X_nons, weights = weights, diff --git a/R/pmm.R b/R/pmm.R index 8cc51e5..9c4d779 100644 --- a/R/pmm.R +++ b/R/pmm.R @@ -55,7 +55,7 @@ pmm_nonprobsvy <- function(outcome, switch(control$predictive_match, { # 1 if (is.null(pop_totals)) { - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = glm_object$y_nons_pred, query = glm_object$y_rand_pred, k = control$k, @@ -87,7 +87,7 @@ pmm_nonprobsvy <- function(outcome, ) } else { # I'm not touching this - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = glm_object$y_nons_pred, query = glm_object$y_rand_pred, k = control$k, @@ -99,7 +99,7 @@ pmm_nonprobsvy <- function(outcome, }, { # 2 if (is.null(pop_totals)) { - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = y_nons, query = glm_object$y_rand_pred, k = control$k, @@ -126,7 +126,7 @@ pmm_nonprobsvy <- function(outcome, ) } else { # I'm not touching this - model_rand <- nonprobMI_nn( + model_rand <- nonprob_mi_nn( data = y_nons, query = glm_object$y_rand_pred, k = control$k, @@ -217,7 +217,7 @@ pmm_exact <- function(pi_ij, } YY <- switch(predictive_match, { - nonprobMI_nn( + nonprob_mi_nn( data = predict( reg_object_boot, newdata = model_obj$model$glm_object$data[boot_samp, , drop = FALSE], @@ -230,7 +230,7 @@ pmm_exact <- function(pi_ij, ) }, { - nonprobMI_nn( + nonprob_mi_nn( data = y_nons_b, query = XX, k = k, diff --git a/R/variance_mi.R b/R/variance_mi.R index 42d5504..289451d 100644 --- a/R/variance_mi.R +++ b/R/variance_mi.R @@ -73,7 +73,7 @@ internal_varMI <- function(svydesign, # not computed, but it should be computed in serious publications var_nonprob <- 0 - # An option in controlInf controls this + # An option in control_inf controls this # Maybe add a warning/message if this computation is omited if (nn_exact_se) { var_nonprob <- pmm_exact( diff --git a/_pkgdown.yml b/_pkgdown.yml index 61bfeda..ea7d4e2 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,3 @@ url: https://ncn-foreigners.github.io/nonprobsvy/ template: bootstrap: 5 - diff --git a/man/control_inf.Rd b/man/control_inf.Rd index d40f468..f615a04 100644 --- a/man/control_inf.Rd +++ b/man/control_inf.Rd @@ -60,7 +60,7 @@ set this value to \code{TRUE} before submitting the final results.} List with selected parameters. } \description{ -\code{control_inf} constructs a list with all necessary control parameters +\code{control_inf} constructs a \code{list} with all necessary control parameters for statistical inference. } \seealso{