From 6c64da2efbd7332cb4ae73d27b3a7447e6605b00 Mon Sep 17 00:00:00 2001 From: LukaszChrostowski Date: Sun, 27 Oct 2024 20:54:04 +0100 Subject: [PATCH] improvements before CRAN submit --- .github/workflows/test-coverage.yaml | 1 + R/bias_correction_ipw.R | 6 ++- R/control_selection.R | 9 ++-- R/data_manip.R | 10 ++-- R/glm.R | 2 +- R/nn.R | 9 ++-- R/nonprobDR.R | 6 +-- R/nonprobIPW.R | 4 +- R/nonprobMI.R | 2 +- R/pmm.R | 68 ++++++++++++++-------------- R/theta_funcs.R | 1 - R/varianceMI.R | 3 +- 12 files changed, 61 insertions(+), 60 deletions(-) diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 8e2e127..998794d 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -13,6 +13,7 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + TEST_NONPROBSVY_MULTICORE_DEVELOPER: "true" steps: - uses: actions/checkout@v3 diff --git a/R/bias_correction_ipw.R b/R/bias_correction_ipw.R index 919530f..ca10aad 100644 --- a/R/bias_correction_ipw.R +++ b/R/bias_correction_ipw.R @@ -84,8 +84,10 @@ mm <- function(X, global = nleqslv_global, xscalm = nleqslv_xscalm, jacobian = TRUE, - control = list(scalex = rep(1, length(par0)), - maxit = maxit), # TODO algorithm did not converge in maxit iterations for cloglog + control = list( + scalex = rep(1, length(par0)), + maxit = maxit + ), # TODO algorithm did not converge in maxit iterations for cloglog R = R, X = X, y = y, diff --git a/R/control_selection.R b/R/control_selection.R index a5cfd19..940a316 100644 --- a/R/control_selection.R +++ b/R/control_selection.R @@ -69,10 +69,11 @@ controlSel <- function(method = "glm.fit", # perhaps another control function fo print_level = 0, start_type = c("glm", "naive", "zero"), nleqslv_method = c("Broyden", "Newton"), - nleqslv_global = c("dbldog", "pwldog", - "cline", "qline", "gline", "hook", "none"), - nleqslv_xscalm = c("fixed","auto") - ) { + nleqslv_global = c( + "dbldog", "pwldog", + "cline", "qline", "gline", "hook", "none" + ), + nleqslv_xscalm = c("fixed", "auto")) { list( epsilon = epsilon, maxit = maxit, diff --git a/R/data_manip.R b/R/data_manip.R index a52e250..0ced926 100644 --- a/R/data_manip.R +++ b/R/data_manip.R @@ -7,9 +7,9 @@ model_frame <- function(formula, data, weights = NULL, svydesign = NULL, pop_tot outcome_name <- names(model_Frame)[1] mt <- terms(formula) # attr(model_Frame, "terms") - nons_names <- all.vars(as.formula(paste("~", paste(attr(mt, "term.labels"), collapse=" + ")))) - #nons_names <- attr(mt, "term.labels") # colnames(get_all_vars(formula, data)) names of variables of nonprobability sample terms(formula, data = data) - ##### Model frame for probability sample ##### + nons_names <- all.vars(as.formula(paste("~", paste(attr(mt, "term.labels"), collapse = " + ")))) + # nons_names <- attr(mt, "term.labels") # colnames(get_all_vars(formula, data)) names of variables of nonprobability sample terms(formula, data = data) + ##### Model frame for probability sample ##### if (outcome_name %in% colnames(svydesign$variables)) { # design_to_frame <- svydesign$variables # design_to_frame[, outcome_name][is.na(design_to_frame[, outcome_name])] <- 0 # replace NA in dependent outcome with 0 @@ -28,8 +28,8 @@ model_frame <- function(formula, data, weights = NULL, svydesign = NULL, pop_tot design_to_frame <- svydesign$variables ## terms_object <- terms(formula) - #names_rand <- all.vars(terms_object) - names_rand <- all.vars(as.formula(paste("~", paste(attr(terms_object, "term.labels"), collapse=" + ")))) + # names_rand <- all.vars(terms_object) + names_rand <- all.vars(as.formula(paste("~", paste(attr(terms_object, "term.labels"), collapse = " + ")))) ## # names_rand <- all.vars(formula[-2]) # old ## diff --git a/R/glm.R b/R/glm.R index f588957..6d0cf2e 100644 --- a/R/glm.R +++ b/R/glm.R @@ -74,7 +74,7 @@ glm_nonprobsvy <- function(outcome, formula_str <- paste(outcome_name, "~", paste(predictors, collapse = " + ")) formula <- as.formula(formula_str) model$formula <- formula - model$data <- as.data.frame(cbind(y_nons, X_nons[,-1, drop = FALSE])) + model$data <- as.data.frame(cbind(y_nons, X_nons[, -1, drop = FALSE])) colnames(model$data) <- c(outcome_name, predictors) model_out <- list( diff --git a/R/nn.R b/R/nn.R index 2e827ba..ff73a1a 100644 --- a/R/nn.R +++ b/R/nn.R @@ -100,7 +100,7 @@ nn_exact <- function(pi_ij, X_nons, X_rand, k, - #control, + # control, N) { # if (isTRUE("ppsmat" %in% class(pi_ij))) { # pi_ij <- pi_ij$pij @@ -121,7 +121,6 @@ nn_exact <- function(pi_ij, dd <- vector(mode = "numeric", length = loop_size) for (jj in 1:loop_size) { - boot_samp <- sample(1:n_nons, size = n_nons, replace = TRUE) # boot_samp <- sample(1:n_rand, size = n_rand, replace = TRUE) y_nons_b <- y[boot_samp] @@ -133,9 +132,9 @@ nn_exact <- function(pi_ij, k = k, searchtype = "standard", treetype = "kd" - #TODO:: add control options - #treetype = control$treetype, - #searchtype = control$searchtype + # TODO:: add control options + # treetype = control$treetype, + # searchtype = control$searchtype ) dd[jj] <- weighted.mean( diff --git a/R/nonprobDR.R b/R/nonprobDR.R index 7b1c709..6b8a3e1 100644 --- a/R/nonprobDR.R +++ b/R/nonprobDR.R @@ -563,7 +563,7 @@ nonprobDR <- function(selection, colnames(X) <- c("(Intercept)", colnames(Xsel)) OutcomeModel$X_nons <- SelectionModel$X_nons <- X[loc_nons, ] SelectionModel$pop_totals <- c(SelectionModel$pop_totals[1], SelectionModel$pop_totals[idx + 1]) - pop_totals <- c(pop_totals[1], pop_totals[idx+1]) + pop_totals <- c(pop_totals[1], pop_totals[idx + 1]) } else if (control_inference$bias_inf == "div") { X_outcome <- as.matrix(X[, beta_selected[-1] + 1, drop = FALSE]) Xsel <- X_selection <- as.matrix(X[, theta_selected[-1] + 1, drop = FALSE]) @@ -848,7 +848,7 @@ nonprobDR <- function(selection, if (is.null(pop_size)) pop_size <- N_nons names(pop_size) <- "pop_size" names(ys) <- all.vars(outcome_init[[2]]) - est_totals <- colSums(SelectionModel$X_nons*as.vector(weights_nons)) + est_totals <- colSums(SelectionModel$X_nons * as.vector(weights_nons)) names(prob_pop_totals) <- colnames(SelectionModel$X_nons) boot_sample <- if (control_inference$var_method == "bootstrap" & control_inference$keep_boot) { @@ -913,7 +913,7 @@ nonprobDR <- function(selection, outcome = OutcomeList, selection = SelectionList, boot_sample = boot_sample, - svydesign = if(is.null(pop_totals)) svydesign else NULL # TODO to customize if pop_totals only + svydesign = if (is.null(pop_totals)) svydesign else NULL # TODO to customize if pop_totals only ), class = c("nonprobsvy", "nonprobsvy_dr") ) diff --git a/R/nonprobIPW.R b/R/nonprobIPW.R index 6a2b95d..10913e3 100644 --- a/R/nonprobIPW.R +++ b/R/nonprobIPW.R @@ -540,7 +540,7 @@ nonprobIPW <- function(selection, if (is.null(pop_size)) pop_size <- N # estimated pop_size names(pop_size) <- "pop_size" names(ys) <- all.vars(outcome_init[[2]]) - est_totals <- colSums(X_nons*as.vector(weights_nons)) + est_totals <- colSums(X_nons * as.vector(weights_nons)) names(prob_pop_totals) <- colnames(X_nons) boot_sample <- if (control_inference$var_method == "bootstrap" & control_inference$keep_boot) { @@ -605,7 +605,7 @@ nonprobIPW <- function(selection, outcome = NULL, selection = SelectionList, boot_sample = boot_sample, - svydesign = if(is.null(pop_totals)) svydesign else NULL # TODO to customize if pop_totals only + svydesign = if (is.null(pop_totals)) svydesign else NULL # TODO to customize if pop_totals only ), class = c("nonprobsvy", "nonprobsvy_ipw") ) diff --git a/R/nonprobMI.R b/R/nonprobMI.R index bb45c4e..acb18aa 100644 --- a/R/nonprobMI.R +++ b/R/nonprobMI.R @@ -494,7 +494,7 @@ nonprobMI <- function(outcome, outcome = OutcomeList, selection = NULL, boot_sample = boot_sample, - svydesign = if(is.null(pop_totals)) svydesign else NULL # TODO to customize if pop_totals only + svydesign = if (is.null(pop_totals)) svydesign else NULL # TODO to customize if pop_totals only ), class = c("nonprobsvy", "nonprobsvy_mi") ) diff --git a/R/pmm.R b/R/pmm.R index f486ed2..e26782d 100644 --- a/R/pmm.R +++ b/R/pmm.R @@ -78,26 +78,26 @@ pmm_nonprobsvy <- function(outcome, ) y_rand_pred <- apply(model_rand$nn.idx, 1, - FUN = \(x) mean(y_nons[x]) - # FUN=\(x) mean(sample_nonprob$short_[x]) + FUN = \(x) mean(y_nons[x]) + # FUN=\(x) mean(sample_nonprob$short_[x]) ) switch(control$pmm_weights, - "none" = { - y_rand_pred <- apply(model_rand$nn.idx, 1, - FUN = \(x) mean(y_nons[x]) - # FUN=\(x) mean(sample_nonprob$short_[x]) - ) - }, - "prop_dist" = { - # TODO:: these weights will need to be saved for variance estimation - y_rand_pred <- sapply(1:NROW(model_rand$nn.idx), - FUN = \(x) weighted.mean(y_nons[model_rand$nn.idx[x, ]], - w = 1 / model_rand$nn.dist[x, ] - ) - # FUN=\(x) mean(sample_nonprob$short_[x]) - ) - } + "none" = { + y_rand_pred <- apply(model_rand$nn.idx, 1, + FUN = \(x) mean(y_nons[x]) + # FUN=\(x) mean(sample_nonprob$short_[x]) + ) + }, + "prop_dist" = { + # TODO:: these weights will need to be saved for variance estimation + y_rand_pred <- sapply(1:NROW(model_rand$nn.idx), + FUN = \(x) weighted.mean(y_nons[model_rand$nn.idx[x, ]], + w = 1 / model_rand$nn.dist[x, ] + ) + # FUN=\(x) mean(sample_nonprob$short_[x]) + ) + } ) } else { # I'm not touching this @@ -122,21 +122,21 @@ pmm_nonprobsvy <- function(outcome, ) switch(control$pmm_weights, - "none" = { - y_rand_pred <- apply(model_rand$nn.idx, 1, - FUN = \(x) mean(y_nons[x]) - # FUN=\(x) mean(sample_nonprob$short_[x]) - ) - }, - "prop_dist" = { - # TODO:: these weights will need to be saved for variance estimation - y_rand_pred <- sapply(1:NROW(model_rand$nn.idx), - FUN = \(x) weighted.mean(y_nons[model_rand$nn.idx[x, ]], - w = 1 / model_rand$nn.dist[x, ] - ) - # FUN=\(x) mean(sample_nonprob$short_[x]) - ) - } + "none" = { + y_rand_pred <- apply(model_rand$nn.idx, 1, + FUN = \(x) mean(y_nons[x]) + # FUN=\(x) mean(sample_nonprob$short_[x]) + ) + }, + "prop_dist" = { + # TODO:: these weights will need to be saved for variance estimation + y_rand_pred <- sapply(1:NROW(model_rand$nn.idx), + FUN = \(x) weighted.mean(y_nons[model_rand$nn.idx[x, ]], + w = 1 / model_rand$nn.dist[x, ] + ) + # FUN=\(x) mean(sample_nonprob$short_[x]) + ) + } ) } else { # I'm not touching this @@ -173,8 +173,8 @@ pmm_exact <- function(pi_ij, n_nons, y, pmm_reg_engine, - #stats, #why is this here? - #glm, #why is this here? + # stats, #why is this here? + # glm, #why is this here? model_obj, svydesign, predictive_match, diff --git a/R/theta_funcs.R b/R/theta_funcs.R index 424b0e3..70ba214 100644 --- a/R/theta_funcs.R +++ b/R/theta_funcs.R @@ -93,7 +93,6 @@ theta_h_estimation <- function(R, start = NULL, pop_totals = NULL, pop_means = NULL) { - p <- ncol(X) # if (is.null(pop_totals) & is.null(pop_means)) { # if (is.null(start)) { diff --git a/R/varianceMI.R b/R/varianceMI.R index dd01cd5..4675c38 100644 --- a/R/varianceMI.R +++ b/R/varianceMI.R @@ -48,11 +48,10 @@ internal_varMI <- function(svydesign, X_rand = X_rand, k = k, # TODO:: add control here - #control = control + # control = control N = N ) } - } else if (method == "glm") { # TODO add variance for count binary outcome variable control_outcome$method beta <- parameters[, 1]