Skip to content

Commit

Permalink
closes #64, closes #55, update NEWS.md,
Browse files Browse the repository at this point in the history
  • Loading branch information
LukaszChrostowski committed Jan 17, 2025
1 parent e02d28d commit 36affe0
Show file tree
Hide file tree
Showing 31 changed files with 435 additions and 425 deletions.
14 changes: 7 additions & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,29 @@

S3method(AIC,nonprobsvy)
S3method(BIC,nonprobsvy)
S3method(check_balance,nonprobsvy)
S3method(confint,nonprobsvy)
S3method(cooks.distance,nonprobsvy)
S3method(deviance,nonprobsvy)
S3method(hatvalues,nonprobsvy)
S3method(logLik,nonprobsvy)
S3method(nobs,nonprobsvy)
S3method(nonprobsvycheck,nonprobsvy)
S3method(pop.size,nonprobsvy)
S3method(pop_size,nonprobsvy)
S3method(print,nonprobsvy)
S3method(print,nonprobsvycheck)
S3method(print,summary_nonprobsvy)
S3method(residuals,nonprobsvy)
S3method(summary,nonprobsvy)
S3method(vcov,nonprobsvy)
export(check_balance)
export(cloglog_model_nonprobsvy)
export(controlInf)
export(controlOut)
export(controlSel)
export(control_inf)
export(control_out)
export(control_sel)
export(genSimData)
export(logit_model_nonprobsvy)
export(nonprob)
export(nonprobsvycheck)
export(pop.size)
export(pop_size)
export(probit_model_nonprobsvy)
import(Rcpp)
import(mathjaxr)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@
### Features
- two additional datasets have been included: `jvs` (Job Vacancy Survey; a probability sample survey) and `admin` (Central Job Offers Database; a non-probability sample survey). The units and auxiliary variables have been aligned in a way that allows the data to be integrated using the methods implemented in this package.
- a `nonprobsvycheck` function was added to check the balance in the totals of the variables based on the weighted weights between the non-probability and probability samples.
- Important - the functions `controlSel`, `controlOut` and `controlInf` have been replaced by their counterparts `control_sel`, `control_out` and `control_inf`.

### Bugfixes
- basic methods and functions related to variance estimation, weights and probability linking methods have been rewritten in a more optimal and readable way.

### Documentation

- annotation has been added that arguments such as `strata`, `subset` and `na_action` are not supported for the time being.

# nonprobsvy 0.1.1

------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions R/cloglogModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' @importFrom maxLik maxLik
#' @importFrom Matrix Matrix
#' @importFrom survey svyrecvar
#' @keywords internal
#' @export
# must be exported to be visible in c++ script, to consider any other option
cloglog_model_nonprobsvy <- function(...) {
Expand Down
6 changes: 4 additions & 2 deletions R/control_inference.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#' @title Control parameters for inference
#' @description \code{controlInf} constructs a list with all necessary control parameters
#'
#' @description \code{control_inf} constructs a list with all necessary control parameters
#' for statistical inference.
#'
#' @param vars_selection If `TRUE`, then variables selection model is used.
#' @param var_method variance method.
#' @param rep_type replication type for weights in the bootstrap method for variance estimation passed to [survey::as.svrepdesign()].
Expand Down Expand Up @@ -36,7 +38,7 @@
#'
#' @export

controlInf <- function(vars_selection = FALSE,
control_inf <- function(vars_selection = FALSE,
var_method = c(
"analytic",
"bootstrap"
Expand Down
6 changes: 4 additions & 2 deletions R/control_outcome.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#' @title Control parameters for outcome model
#' @description \code{controlOut} constructs a list with all necessary control parameters
#'
#' @description \code{control_out} constructs a list with all necessary control parameters
#' for outcome model.
#'
#' @param epsilon Tolerance for fitting algorithms. Default is \code{1e-6}.
#' @param maxit Maximum number of iterations.
#' @param trace logical value. If `TRUE` trace steps of the fitting algorithms. Default is `FALSE`.
Expand Down Expand Up @@ -44,7 +46,7 @@
#'
#' @export

controlOut <- function(epsilon = 1e-4,
control_out <- function(epsilon = 1e-4,
maxit = 100,
trace = FALSE,
k = 1,
Expand Down
6 changes: 2 additions & 4 deletions R/control_selection.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Control parameters for selection model
#' @author Łukasz Chrostowski, Maciej Beręsewicz
#' \loadmathjax
#'
#' @description \code{controlSel} constructs a list with all necessary control parameters
#' @description \code{control_sel} constructs a list with all necessary control parameters
#' for selection model.
#'
#'
Expand Down Expand Up @@ -48,7 +46,7 @@
#'
#' @export

controlSel <- function(method = "glm.fit", # perhaps another control function for model with variables selection
control_sel <- function(method = "glm.fit", # perhaps another control function for model with variables selection
epsilon = 1e-4,
maxit = 500,
trace = FALSE,
Expand Down
4 changes: 2 additions & 2 deletions R/internals.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ start_fit <- function(X,
weights,
weights_rand,
method_selection,
control_selection = controlSel()) {
control_selection = control_sel()) {
weights_to_glm <- c(weights_rand, weights)
start_model <- stats::glm.fit(
x = X, # glm model for initial values in propensity score estimation
Expand Down Expand Up @@ -207,7 +207,7 @@ nonprobMI_fit <- function(outcome,
svydesign = NULL,
family_outcome = "gaussian",
start = NULL,
control_outcome = controlOut(),
control_outcome = control_out(),
verbose = FALSE,
model = TRUE,
x = FALSE,
Expand Down
1 change: 1 addition & 0 deletions R/logitModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#' @importFrom stats qlogis
#'
#'
#' @keywords internal
#' @export
# must be exported to be visible in c++ script, to consider any other option
logit_model_nonprobsvy <- function(...) {
Expand Down
18 changes: 9 additions & 9 deletions R/main_function_documentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ NULL
#' @param method_selection a `character` with method for propensity scores estimation.
#' @param method_outcome a `character` with method for response variable estimation.
#' @param family_outcome a `character` string describing the error distribution and link function to be used in the model. Default is "gaussian". Currently supports: gaussian with identity link, poisson and binomial.
#' @param subset an optional `vector` specifying a subset of observations to be used in the fitting process.
#' @param strata an optional `vector` specifying strata.
#' @param subset an optional `vector` specifying a subset of observations to be used in the fitting process - not yet supported.
#' @param strata an optional `vector` specifying strata - not yet supported.
#' @param weights an optional `vector` of prior weights to be used in the fitting process. Should be NULL or a numeric vector. It is assumed that this vector contains frequency or analytic weights.
#' @param na_action a function which indicates what should happen when the data contain `NAs`.
#' @param na_action a function which indicates what should happen when the data contain `NAs` - not yet supported.
#' @param control_selection a `list` indicating parameters to use in fitting selection model for propensity scores.
#' @param control_outcome a `list` indicating parameters to use in fitting model for outcome variable.
#' @param control_inference a `list` indicating parameters to use in inference based on probability and non-probability samples, contains parameters such as estimation method or variance method.
Expand Down Expand Up @@ -153,7 +153,7 @@ NULL
#' }
#' where \mjseqn{\lambda_{\theta}} and \mjseqn{q_{\lambda_{\beta}}} are some smooth functions. We let \mjseqn{q_{\lambda} \left(x\right) = \frac{\partial p_{\lambda}}{\partial x}}, where \mjseqn{p_{\lambda}} is some penalization function.
#' Details of penalization functions and techniques for solving this type of equation can be found [here](https://ncn-foreigners.github.io/nonprobsvy-book/variableselection.html).
#' To use the variable selection model, set the `vars_selection` parameter in the [controlInf()] function to `TRUE`. In addition, in the other control functions such as [controlSel()] and [controlOut()]
#' To use the variable selection model, set the `vars_selection` parameter in the [control_inf()] function to `TRUE`. In addition, in the other control functions such as [control_sel()] and [control_out()]
#' you can set parameters for the selection of the relevant variables, such as the number of folds during cross-validation algorithm or the lambda value for penalizations. Details can be found
#' in the documentation of the control functions for `nonprob`.
#'
Expand Down Expand Up @@ -194,7 +194,7 @@ NULL
#' \item{\code{pop_size} -- estimated population size derived from estimated weights (non-probability sample) or known design weights (probability sample).}
#' \item{\code{pop_totals} -- the total values of the auxiliary variables derived from a probability sample or vector of total/mean values.}
#' \item{\code{outcome} -- list containing information about the fitting of the mass imputation model, in the case of regression model the object containing the list returned by
#' [stats::glm()], in the case of the nearest neighbour imputation the object containing list returned by [RANN::nn2()]. If `bias_correction` in [controlInf()] is set to `TRUE`, the estimation is based on
#' [stats::glm()], in the case of the nearest neighbour imputation the object containing list returned by [RANN::nn2()]. If `bias_correction` in [control_inf()] is set to `TRUE`, the estimation is based on
#' the joint estimating equations for the `selection` and `outcome` model and therefore, the list is different from the one returned by the [stats::glm()] function and contains elements such as
#' \itemize{
#' \item{\code{coefficients} -- estimated coefficients of the regression model.}
Expand Down Expand Up @@ -244,7 +244,7 @@ NULL
#' }
#' \item{\code{stat} -- matrix of the estimated population means in each bootstrap iteration.
#' Returned only if a bootstrap method is used to estimate the variance and \code{keep_boot} in
#' [controlInf()] is set on `TRUE`.}
#' [control_inf()] is set on `TRUE`.}
#' }
#' @seealso
#' [stats::optim()] -- For more information on the \code{optim} function used in the
Expand All @@ -263,11 +263,11 @@ NULL
#'
#' [RANN::nn2()] -- For more information about the nearest neighbour algorithm used during mass imputation process.
#'
#' [controlSel()] -- For the control parameters related to selection model.
#' [control_sel()] -- For the control parameters related to selection model.
#'
#' [controlOut()] -- For the control parameters related to outcome model.
#' [control_out()] -- For the control parameters related to outcome model.
#'
#' [controlInf()] -- For the control parameters related to statistical inference.
#' [control_inf()] -- For the control parameters related to statistical inference.

#' @examples
#' \donttest{
Expand Down
9 changes: 5 additions & 4 deletions R/nonprob.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ nonprob <- function(data,
strata = NULL,
weights = NULL,
na_action = NULL,
control_selection = controlSel(),
control_outcome = controlOut(),
control_inference = controlInf(),
control_selection = control_sel(),
control_outcome = control_out(),
control_inference = control_inf(),
start_selection = NULL,
start_outcome = NULL,
verbose = FALSE,
Expand All @@ -38,7 +38,8 @@ nonprob <- function(data,
if (missing(method_outcome)) method_outcome <- "glm"
if (!(method_outcome %in% c("glm", "nn", "pmm"))) stop("Invalid method for outcome variable.")
if (!is.null(svydesign)) {
if (class(svydesign)[2] != "survey.design") stop("svydesign must be a survey.design object.")
if ("svyrep.design" %in% class(svydesign)) stop("We do not currently support the `svyrep.design` class. Provide the survey data in the `survey.design2` class.")
if ("pps" %in% class(svydesign)) stop("The `as.svrepdesign` function does not allow `pps` designs. For more details, see the `survey` package.")
}
if (!is.null(pop_totals)) {
if (!is.vector(pop_totals)) stop("pop_totals must be a vector.")
Expand Down
6 changes: 3 additions & 3 deletions R/nonprobDR.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ nonprobDR <- function(selection,
strata,
weights,
na_action,
control_selection = controlSel(),
control_outcome = controlOut(),
control_inference = controlInf(),
control_selection,
control_outcome,
control_inference,
start_outcome,
start_selection,
verbose,
Expand Down
4 changes: 2 additions & 2 deletions R/nonprobIPW.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ nonprobIPW <- function(selection,
strata,
weights,
na_action,
control_selection = controlSel(),
control_inference = controlInf(),
control_selection,
control_inference,
start_selection,
verbose,
x,
Expand Down
2 changes: 1 addition & 1 deletion R/nonprobMI.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ nonprobMI <- function(outcome,
weights,
na_action,
control_outcome,
control_inference = controlInf(var_method = "analytic"),
control_inference,
start_outcome,
verbose,
x,
Expand Down
1 change: 1 addition & 0 deletions R/probitModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#' @importFrom stats dnorm
#' @importFrom Matrix Matrix
#' @importFrom survey svyrecvar
#' @keywords internal
#' @export
# must be exported to be visible in c++ script, to consider any other option
probit_model_nonprobsvy <- function(...) {
Expand Down
16 changes: 8 additions & 8 deletions R/simple_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ nobs.nonprobsvy <- function(object,
...) {
c("prob" = object$prob_size, "nonprob" = object$nonprob_size)
}
#' @method pop.size nonprobsvy
#' @method pop_size nonprobsvy
#' @exportS3Method
pop.size.nonprobsvy <- function(object,
pop_size.nonprobsvy <- function(object,
...) {
object$pop_size
}
Expand All @@ -19,8 +19,8 @@ pop.size.nonprobsvy <- function(object,
#' @param ... additional parameters
#' @return Vector returning the value of the estimated population size.
#' @export
pop.size <- function(object, ...) {
UseMethod("pop.size")
pop_size <- function(object, ...) {
UseMethod("pop_size")
}
#' @method residuals nonprobsvy
#' @importFrom stats residuals
Expand Down Expand Up @@ -321,9 +321,9 @@ deviance.nonprobsvy <- function(object,
if (class(object)[2] == "nonprobsvy_dr") res <- c("selection" = res_sel, "outcome" = res_out)
res
}
#' @method nonprobsvycheck nonprobsvy
#' @method check_balance nonprobsvy
#' @exportS3Method
nonprobsvycheck.nonprobsvy <- function(x, object, dig = 10) {
check_balance.nonprobsvy <- function(x, object, dig = 10) {
# Input validation
if (!inherits(x, "formula")) {
stop("'x' must be a formula")
Expand Down Expand Up @@ -443,8 +443,8 @@ nonprobsvycheck.nonprobsvy <- function(x, object, dig = 10) {
#' @importFrom survey svytotal
#' @importFrom stats setNames
#' @export
nonprobsvycheck <- function(x, object, dig) {
UseMethod("nonprobsvycheck", object)
check_balance <- function(x, object, dig) {
UseMethod("check_balance", object)
}
# Internal function - not exported in CRAN version
# Will be exported in future releases after variance estimation is implemented
Expand Down
2 changes: 1 addition & 1 deletion R/summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ summary.nonprobsvy <- function(object,
cnf_int = object$confidence_interval
),
sample_size = nobs(object, ...),
population_size = pop.size(object, ...),
population_size = pop_size(object, ...),
totals = object$pop_totals,
test = test,
control = object$control,
Expand Down
Loading

0 comments on commit 36affe0

Please sign in to comment.