diff --git a/R/nonprobDR.R b/R/nonprobDR.R index 2686013..7b1c709 100644 --- a/R/nonprobDR.R +++ b/R/nonprobDR.R @@ -453,7 +453,7 @@ nonprobDR <- function(selection, method_selection = method_selection ) theta_hat <- selection_model$theta_hat - # grad <- est_method_obj$grad + grad <- selection_model$grad hess <- selection_model$hess ps_nons <- selection_model$ps_nons est_ps_rand <- selection_model$est_ps_rand @@ -495,6 +495,7 @@ nonprobDR <- function(selection, # beta_statistics <- model_obj$parameters sigma <- NULL OutcomeList[[k]] <- model_obj$model + OutcomeList[[k]]$model_frame <- OutcomeModel$model_frame y_nons <- OutcomeModel$y_nons mu_hat <- mu_hatDR( @@ -689,6 +690,7 @@ nonprobDR <- function(selection, y_nons_pred <- model_obj$y_nons_pred parameters <- model_obj$parameters OutcomeList[[k]] <- model_obj$model + OutcomeList[[k]]$model_frame <- OutcomeModel$model_frame_rand mu_hat <- 1 / N_nons * sum((1 / ps_nons) * (weights * (OutcomeModel$y_nons - y_nons_pred))) + y_rand_pred } else { @@ -872,6 +874,12 @@ nonprobDR <- function(selection, formula = selection, df_residual = selection_model$df_residual, log_likelihood = selection_model$log_likelihood, + hessian = hess, + gradient = grad, + method = est_method, + prob_der = ps_nons_der, + prob_rand = ps_rand, + prob_rand_est = est_ps_rand, cve = if (control_inference$vars_selection == TRUE) { cve_selection } else { @@ -904,7 +912,8 @@ nonprobDR <- function(selection, pop_totals = prob_pop_totals, outcome = OutcomeList, selection = SelectionList, - boot_sample = boot_sample + boot_sample = boot_sample, + svydesign = if(is.null(pop_totals)) svydesign else NULL # TODO to customize if pop_totals only ), class = c("nonprobsvy", "nonprobsvy_dr") ) diff --git a/R/nonprobIPW.R b/R/nonprobIPW.R index a40aed0..6a2b95d 100644 --- a/R/nonprobIPW.R +++ b/R/nonprobIPW.R @@ -169,7 +169,7 @@ nonprobIPW <- function(selection, method_selection = method_selection ) theta_hat <- selection_model$theta_hat - # grad <- est_method_obj$grad + grad <- selection_model$grad hess <- selection_model$hess var_cov1 <- selection_model$var_cov1 var_cov2 <- selection_model$var_cov2 @@ -563,9 +563,19 @@ nonprobIPW <- function(selection, weights = as.vector(weights_nons), prior.weights = weights, est_totals = est_totals, + pop_totals = pop_totals, formula = selection, df_residual = selection_model$df_residual, log_likelihood = selection_model$log_likelihood, + method_selection = method_selection, + hessian = hess, + gradient = grad, + method = est_method, + prob_der = ps_nons_der, + prob_rand = ps_rand, + prob_rand_est = est_ps_rand, + prob_rand_est_der = est_ps_rand_der, + h_function = h, cve = if (control_inference$vars_selection == TRUE) { cve_selection } else { @@ -594,7 +604,8 @@ nonprobIPW <- function(selection, pop_totals = prob_pop_totals, outcome = NULL, selection = SelectionList, - boot_sample = boot_sample + boot_sample = boot_sample, + svydesign = if(is.null(pop_totals)) svydesign else NULL # TODO to customize if pop_totals only ), class = c("nonprobsvy", "nonprobsvy_ipw") ) diff --git a/R/nonprobMI.R b/R/nonprobMI.R index d1c2258..bb45c4e 100644 --- a/R/nonprobMI.R +++ b/R/nonprobMI.R @@ -244,6 +244,7 @@ nonprobMI <- function(outcome, y_nons_pred <- model_obj$y_nons_pred # parameters <- model_obj$parameters OutcomeList[[k]] <- model_obj$model + OutcomeList[[k]]$model_frame <- OutcomeModel$model_frame_rand # updating probability sample by adding y_hat variable svydesign <- stats::update(svydesign, @@ -326,6 +327,7 @@ nonprobMI <- function(outcome, y_nons_pred <- model_obj$y_nons_pred parameters <- model_obj$parameters OutcomeList[[k]] <- model_obj$model + OutcomeList[[k]]$model_frame <- Model$model_frame_rand mu_hat <- y_rand_pred } else { stop("Please, provide svydesign object or pop_totals/pop_means.") @@ -491,7 +493,8 @@ nonprobMI <- function(outcome, pop_totals = prob_pop_totals, outcome = OutcomeList, selection = NULL, - boot_sample = boot_sample + boot_sample = boot_sample, + svydesign = if(is.null(pop_totals)) svydesign else NULL # TODO to customize if pop_totals only ), class = c("nonprobsvy", "nonprobsvy_mi") ) diff --git a/R/simple_methods.R b/R/simple_methods.R index 1a937fb..6ae4aab 100644 --- a/R/simple_methods.R +++ b/R/simple_methods.R @@ -334,7 +334,7 @@ deviance.nonprobsvy <- function(object, total.nonprobsvy <- function(x, nonprob) { variables <- lhs.vars(x) groups <- rhs.vars(x) - + # TODO variance implementation if (is.null(variables)) { data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE] total <- tapply(nonprob$weights[which(nonprob$R == 1)], data, sum) @@ -357,21 +357,141 @@ total.nonprobsvy <- function(x, nonprob) { #' @importFrom formula.tools rhs.vars #' @importFrom stats aggregate mean.nonprobsvy <- function(x, nonprob) { + # TODO variance for MI + # TODO all implementation for DR variables <- lhs.vars(x) groups <- rhs.vars(x) - + class_nonprob <- class(nonprob)[2] + if (!is.null(variables) & class_nonprob != 'nonprobsvy_ipw') + stop("DR or MI estimators only for y variable in subgroups. Recommended formula definition + is something like ~ x + y where x and y are grouping variables") if (is.null(variables)) { - data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE] - mean_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, mean) # to consider + # data <- nonprob$data[, groups, drop = FALSE] + data <- model.matrix(as.formula(paste(x, "- 1")), data = nonprob$data) + if (class_nonprob == "nonprobsvy_ipw") { + mean_value <- sapply(as.data.frame(data), function(col) weighted.mean(col, nonprob$weights)) + ys <- data + } else { + # MI TODO for more than one y + if (class_nonprob == "nonprobsvy_mi") { + data_nonprob <- split(cbind(nonprob$data, nonprob$outcome[[1]]$fitted.values), x) + data_prob <- split(cbind(nonprob$svydesign$variables, nonprob$svydesign$prob), x) + + # X_nons <- model.matrix(delete.response(terms(nonprob$outcome[[1]]$formula)), data_nonprob) + # X_rand <- model.matrix(delete.response(terms(nonprob$outcome[[1]]$formula)), data_prob) + + mean_value <- aggregate(formula(paste("y_hat_MI", x)), + data = nonprob$svydesign$variables, + FUN = function(y, w) weighted.mean(y, w = w[1:length(y)]), + w = 1 / nonprob$svydesign$prob) + + } else if (class_nonprob == "nonprobsvy_dr") { + mean_value <- list() + for (i in 1:p) { + mean_value[[i]] <- mu_hatDR( + y = , + y_nons = , + y_rand = , + weights = , # TODO + weights_nons = , + weights_rand = , + N_nons = , + N_rand = ) + } + } + } } else { - data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)] - weights <- nonprob$weights[which(nonprob$R == 1)] + data <- nonprob$data[, c(variables, groups)] + weights <- nonprob$weights + if (class_nonprob == "nonprobsvy_ipw") { + mean_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE], + FUN = function(y, w) weighted.mean(y, w = w[1:length(y)]), + w = weights) + } + } + p <- length(mean_value) + variances <- numeric(length = p) - mean_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE], - FUN = function(y, w) weighted.mean(y, w = w[1:length(y)]), - w = weights) + for (i in 1:p) { + if (class_nonprob == "nonprobsvy_ipw") { + yy <- ys[,i] + mu <- mean_value[i] + var <- internal_varIPW(svydesign = nonprob$svydesign, + X_nons = nonprob$X[nonprob$R == 1, ], + X_rand = nonprob$X[nonprob$R == 0, ], + y_nons = yy, + weights = nonprob$selection$prior.weights, + ps_nons = nonprob$prob[nonprob$R == 1], + mu_hat = mu, + hess = nonprob$selection$hessian, + ps_nons_der = as.vector(nonprob$selection$prob_der), + N = nonprob$pop_size, + est_ps_rand = as.vector(nonprob$selection$prob_rand_est), # TODO + ps_rand = nonprob$svydesign$prob_rand, + est_ps_rand_der = as.vector(nonprob$selection$prob_rand_est_der), # TODO + n_rand = nonprob$prob_size, + pop_size = nonprob$pop_size, + pop_totals = nonprob$selection$pop_totals, + method_selection = nonprob$selection$method_selection, + est_method = nonprob$selection$method, + theta = nonprob$selection$coefficients, + h = nonprob$selection$h_function, + verbose = FALSE, + var_cov1 = nonprob$selection$link$variance_covariance1, + var_cov2 = nonprob$selection$link$variance_covariance2 + ) + } else if (nonprob_class == "nonprobsvy_mi") { + + var <- internal_varMI(svydesign = nonprob$svydesign, + X_nons = nonprob$X[nonprob$R == 1, ], # subset by x formula + X_rand = nonprob$X[nonprob$R == 0, ], # subset by x formula + y = ys_nons[[i]]$single_shift, # TODO y_nons + y_pred = ys_pred_nons[[i]]$single_shift, # TODO + weights_rand = 1 / nonprob$svydesig$prob, # subset by x formula + method = method_outcome, + n_rand = nonprob$prob_size, + n_nons = nonprob$nonprob_size, + N = nonprob$pop_size, + family = nonprob$outcome$family, + model_obj = model_objects, # TODO + pop_totals = nonprob$pop_totals, + # we should probably just pass full control list + k = nonprob$control$control_outcome$k, + predictive_match = nonprob$control$control_outcome$predictive_match, + nn_exact_se = nonprob$control$control_inference$nn_exact_se, + pmm_reg_engine = nonprob$control$control_outcome$pmm_reg_engine, + pi_ij = nonprob$control$control_inference$pi_ij + ) + } else if (nonprob_class == "nonprobsvy_dr") { + var <- internal_varDR(OutcomeModel = OutcomeModel, + SelectionModel = SelectionModel, + y_nons_pred = ys_nons_pred, + weights = 1, + weights_rand = 1 / nonprob$svydesign$prob, + method_selection = method_selection, + control_selection = nonprob$control$control_selection, + ps_nons = nonprob$prob, + theta = nonprob$selection$coefficients, + hess = nonprob$selection$hessian, + ps_nons_der = nonprob$selection$ps_nons_der, + est_ps_rand = nonprob$selection$prob_rand_est, + y_rand_pred = ys_pred, + N_nons = nonprob$pop_size, + est_ps_rand_der = nonprob$selection$prob_rand_est_der, + svydesign = nonprob$svydesign, + est_method = nonprob$selection$method, + h = nonprob$selection$h_function, + pop_totals = nonprob$pop_totals, + sigma = nonprob$outcome$sigma, # TODO + bias_correction = nonprob$control$control_inference$bias_corr, + verbose = FALSE + ) + } + variances[i] <- var$var } - mean_value + res <- data.frame(mean = mean_value, + se = sqrt(variances)) # perhaps convert to data.frame + res } # @title Median values of covariates in subgroups # diff --git a/R/varianceIPW.R b/R/varianceIPW.R index aae869d..07ac9d8 100644 --- a/R/varianceIPW.R +++ b/R/varianceIPW.R @@ -70,7 +70,6 @@ internal_varIPW <- function(svydesign, psd = est_ps_rand_der ) - # variance-covariance matrix for set of parameters (mu_hat and theta_hat) V_mx_nonprob <- sparse_mx %*% V1 %*% t(as.matrix(sparse_mx)) # nonprobability component V_mx_prob <- sparse_mx %*% V2 %*% t(as.matrix(sparse_mx)) # probability component diff --git a/README.Rmd b/README.Rmd index 07baa5b..c2f4d59 100644 --- a/README.Rmd +++ b/README.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( [![R-CMD-check](https://github.com/ncn-foreigners/nonprobsvy/workflows/R-CMD-check/badge.svg)](https://github.com/ncn-foreigners/nonprobsvy/actions) -[![codecov](https://codecov.io/gh/ncn-foreigners/nonprobsvy/branch/dev/graph/badge.svg?token=TK9GDFOJF5)](https://codecov.io/gh/ncn-foreigners/nonprobsvy) +[![Codecov test coverage](https://codecov.io/gh/ncn-foreigners/nonprobsvy/branch/main/graph/badge.svg)](https://app.codecov.io/gh/ncn-foreigners/nonprobsvy?branch=main) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10280114.svg)](https://doi.org/10.5281/zenodo.10280114) [![CRAN status](https://www.r-pkg.org/badges/version/nonprobsvy)](https://CRAN.R-project.org/package=nonprobsvy) diff --git a/README.md b/README.md index 35d795b..79bd182 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ [![R-CMD-check](https://github.com/ncn-foreigners/nonprobsvy/workflows/R-CMD-check/badge.svg)](https://github.com/ncn-foreigners/nonprobsvy/actions) -[![codecov](https://codecov.io/gh/ncn-foreigners/nonprobsvy/branch/dev/graph/badge.svg?token=TK9GDFOJF5)](https://codecov.io/gh/ncn-foreigners/nonprobsvy) +[![Codecov test +coverage](https://codecov.io/gh/ncn-foreigners/nonprobsvy/branch/main/graph/badge.svg)](https://app.codecov.io/gh/ncn-foreigners/nonprobsvy?branch=main) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10280114.svg)](https://doi.org/10.5281/zenodo.10280114) [![CRAN status](https://www.r-pkg.org/badges/version/nonprobsvy)](https://CRAN.R-project.org/package=nonprobsvy)