From 40d5c06b0cdbc77b46cc9e5892aab3438118d7f5 Mon Sep 17 00:00:00 2001 From: LukaszChrostowski Date: Sun, 5 Nov 2023 16:35:50 +0100 Subject: [PATCH] add another start method, readme update --- R/EstimationMethods.R | 43 ++++++++++++++++++++++--- R/control.R | 11 +++++-- R/internals.R | 54 +++++++++++++++---------------- R/methods.R | 36 ++++++++++----------- R/nonprobDR.R | 25 ++++++++++++++- R/nonprobIPW.R | 24 +++++++++++++- README.Rmd | 2 +- README.md | 74 +++++++++++++++++++++---------------------- man/controlSel.Rd | 11 ++++++- 9 files changed, 187 insertions(+), 93 deletions(-) diff --git a/R/EstimationMethods.R b/R/EstimationMethods.R index cc00a1d..76d525d 100644 --- a/R/EstimationMethods.R +++ b/R/EstimationMethods.R @@ -97,11 +97,21 @@ mle <- function(...) { # initial values for propensity score estimation if (is.null(start)) { - start <- start_fit(X = X, - R = R, - weights = weights, - weights_rand = weights_rand, - method_selection = method_selection) + if (control_selection$start_type == "glm") { + start <- start_fit(X = X, + R = R, + weights = weights, + weights_rand = weights_rand, + method_selection = method_selection) + } else if (control_selection$start_type == "naive") { + intercept_start <- max_lik(X_nons = X_nons[, 1, drop = FALSE], + X_rand = X_rand[, 1, drop = FALSE], + weights = weights, + weights_rand = weights_rand, + start = 0, + control = control_selection)$theta_hat + start <- c(intercept_start, rep(0, ncol(X_nons) - 1)) + } } df_reduced <- nrow(X) - length(start) @@ -216,6 +226,7 @@ gee <- function(...) { h = h, est_method, maxit, + control_selection, start, varcov = FALSE, ...){ @@ -223,6 +234,28 @@ gee <- function(...) { method_selection_function <- paste(method_selection, "_model_nonprobsvy", sep = "") method <- get_method(method = method_selection_function) inv_link <- method$make_link_inv + + if (is.null(start)) { + if (control_selection$start_type == "glm") { + start0 <- start_fit(X = X, # <--- does not work with pop_totals + R = R, + weights = weights, + weights_rand = weights_rand, + method_selection = method_selection) + } else if (control_selection$start_type == "naive") { + start_h <- theta_h_estimation(R = R, + X = X[, 1, drop = FALSE], + weights_rand = weights_rand, + weights = weights, + h = h, + method_selection = method_selection, + maxit = maxit, + start = 0) + start <- c(start_h, rep(0, ncol(X) - 1)) + } + } + + h_object <- theta_h_estimation(R = R, X = X, weights_rand = weights_rand, diff --git a/R/control.R b/R/control.R index dad4d2c..d4de0d9 100644 --- a/R/control.R +++ b/R/control.R @@ -29,6 +29,11 @@ #' @param nlambda The number of `lambda` values. Default is 50. #' @param nfolds The number of folds for cross validation. Default is 10. #' @param print_level this argument determines the level of printing which is done during the optimization (for propensity score model) process. +#' @param start_type - Type of method for start points for model fitting taking the following values +#' \itemize{ +#' \item if \code{glm} then start taken from the glm function called on samples. +#' \item if \code{naive} then start consists of a vector which has the value of an estimated parameter for one-dimensional data (on intercept) and 0 for the rest. +#' } #' #' @return List with selected parameters. #' @@ -52,7 +57,8 @@ controlSel <- function(method = "glm.fit", # perhaps another control function fo lambda_min = .001, nlambda = 50, nfolds = 10, - print_level = 0 + print_level = 0, + start_type = c("glm", "naive") ) { list(epsilon = epsilon, @@ -72,7 +78,8 @@ controlSel <- function(method = "glm.fit", # perhaps another control function fo nlambda = nlambda, nfolds = nfolds, lambda = lambda, - print_level = print_level + print_level = print_level, + start_type = if(missing(start_type)) "naive" else start_type ) } diff --git a/R/internals.R b/R/internals.R index c91b8ce..96120ba 100644 --- a/R/internals.R +++ b/R/internals.R @@ -74,30 +74,30 @@ theta_h_estimation <- function(R, pop_means = NULL){ # TODO with BERENZ recommendation p <- ncol(X) - if (is.null(pop_totals) & is.null(pop_means)) { - if (is.null(start)) { - start0 <- start_fit(X = X, # <--- does not work with pop_totals - R = R, - weights = weights, - weights_rand = weights_rand, - method_selection = method_selection) - } else { - start0 <- start - } - } else { # TODO customize start point for fitting with population totals - # start0 <- rep(.8, ncol(X)) - # X_pop <- rbind(X, pop_totals) - # weights_randd <- 1 - if (is.null(start)) { - start0 <- start_fit(X = X, # <--- does not work with pop_totals - R = R, - weights = weights, - weights_rand = weights_rand, - method_selection = method_selection) - } else { - start0 <- start - } - } + # if (is.null(pop_totals) & is.null(pop_means)) { + # if (is.null(start)) { + # start0 <- start_fit(X = X, # <--- does not work with pop_totals + # R = R, + # weights = weights, + # weights_rand = weights_rand, + # method_selection = method_selection) + # } else { + # start0 <- start + # } + # } else { # TODO customize start point for fitting with population totals + # # start0 <- rep(.8, ncol(X)) + # # X_pop <- rbind(X, pop_totals) + # # weights_randd <- 1 + # if (is.null(start)) { + # start0 <- start_fit(X = X, # <--- does not work with pop_totals + # R = R, + # weights = weights, + # weights_rand = weights_rand, + # method_selection = method_selection) + # } else { + # start0 <- start + # } + # } u_theta <- u_theta(R = R, X = X, weights = c(weights_rand, weights), @@ -112,7 +112,7 @@ theta_h_estimation <- function(R, method_selection = method_selection, pop_totals = pop_totals) - root <- nleqslv::nleqslv(x = start0, + root <- nleqslv::nleqslv(x = start, fn = u_theta, method = "Newton", # TODO consider the methods global = "cline", #qline", @@ -123,7 +123,7 @@ theta_h_estimation <- function(R, ) - start <- root$x + theta_root <- root$x if (root$termcd %in% c(2:7, -10)) { switch(as.character(root$termcd), "2" = warning("Relatively convergent algorithm when fitting selection model by nleqslv, but user must check if function values are acceptably small."), @@ -134,7 +134,7 @@ theta_h_estimation <- function(R, "7" = warning("Jacobian is unusable when fitting selection model by nleqslv."), "-10" = warning("user specified Jacobian is incorrect when fitting selection model by nleqslv.")) } - theta_h <- as.vector(start) + theta_h <- as.vector(theta_root) grad <- u_theta(theta_h) hess <- u_theta_der(theta_h) # TODO compare with root$jac #hess <- root$jac diff --git a/R/methods.R b/R/methods.R index 9541c6b..e789a6b 100644 --- a/R/methods.R +++ b/R/methods.R @@ -272,14 +272,14 @@ residuals.nonprobsvy <- function(object, if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) { if (object$control$control_inference$vars_selection == FALSE) { - res_out <- residuals(object$outcome) # TODO for variable selection + res_out <- residuals(object$outcome[[1]]) # TODO for variable selection } else { # TODO for variable selection r <- object$outcome$family$residuals res_out <- switch(type, "response" = r, - "working" = r/object$outcome$family$mu, + "working" = r/object$outcome[[1]]$family$mu, # TODO "deviance" = - "pearson" = r/sqrt(object$outcome$family$variance) + "pearson" = r/sqrt(object$outcome[[1]]$family$variance) ) } } @@ -311,7 +311,7 @@ cooks.distance.nonprobsvy <- function(model, # TODO for variable selection resids <- residuals(model, type = "pearsonSTD")^2 hats <- hatvalues(model) res_sel <- (resids * (hats / (length(coef(model))))) # TODO - res_out <- cooks.distance(model$outcome$glm) + res_out <- cooks.distance(model$outcome[[1]]) } #' @method hatvalues nonprobsvy @@ -321,7 +321,7 @@ cooks.distance.nonprobsvy <- function(model, # TODO for variable selection hatvalues.nonprobsvy <- function(model, ...) { # TODO reduce execution time and glm.fit object and customise to variable selection if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(model))) { - propensity_scores <- model$prop_scores + propensity_scores <- model$prob W <- Matrix::Diagonal(x = propensity_scores * (1 - propensity_scores)) XWX_inv <- solve(t(model$X) %*% W %*% model$X) hat_values_sel <- vector(mode = "numeric", length = length(propensity_scores)) @@ -348,10 +348,10 @@ logLik.nonprobsvy <- function(object, ...) { if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) { val_sel <- object$selection$log_likelihood attr(val_sel, "nobs") <- dim(residuals(object, type = "pearson"))[1] - attr(val_sel, "df") <- nrow(object$parameters) #length(object$coefficients) + attr(val_sel, "df") <- length(object$selection$coefficients) class(val_sel) <- "logLik" } - val_out <- ifelse(any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object)), logLik(object$outcome), 0) # TODO for gee + val_out <- ifelse(any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object)), logLik(object$outcome[[1]]), 0) # TODO for gee if (class(object)[2] == "nonprobsvy_mi") val <- c("outcome" = val_out) if (class(object)[2] == "nonprobsvy_ipw") val <- c("selection" = val_sel) if (class(object)[2] == "nonprobsvy_dr") val <- c("selection" = val_sel, "outcome" = val_out) @@ -363,8 +363,8 @@ logLik.nonprobsvy <- function(object, ...) { AIC.nonprobsvy <- function(object, ...) { if (!is.character(object$selection$log_likelihood)) { - if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- 2 * (length(object$parameters) - object$selection$log_likelihood) - if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- object$outcome$aic + if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- 2 * (length(object$selection$coefficients) - object$selection$log_likelihood) + if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- object$outcome[[1]]$aic if (class(object)[2] == "nonprobsvy_mi") res <- c("outcome" = res_out) if (class(object)[2] == "nonprobsvy_ipw") res <- c("selection" = res_sel) if (class(object)[2] == "nonprobsvy_dr") res <- c("selection" = res_sel, "outcome" = res_out) @@ -380,15 +380,15 @@ AIC.nonprobsvy <- function(object, BIC.nonprobsvy <- function(object, ...) { if (!is.character(object$selection$log_likelihood)) { - if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- length(object$parameters) * log(object$nonprob_size + object$prob_size) - 2 * object$selection$log_likelihood + if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- length(object$selection$coefficients) * log(object$nonprob_size + object$prob_size) - 2 * object$selection$log_likelihood if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) { - if (!is.null(object$outcome$coefficients)) { + if (!is.null(object$outcome[[1]]$coefficients)) { if (object$control$control_inference$vars_selection == TRUE) { options(AIC="BIC") res_out <- HelpersMG::ExtractAIC.glm(object$outcome)[2] } else { - res_out <- BIC(object$outcome) + res_out <- BIC(object$outcome[[1]]) } } else { res_out <- "not available for this methos" @@ -425,17 +425,17 @@ confint.nonprobsvy <- function(object, if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) { std <- sqrt(diag(vcov(object)[[1]])) sc <- qnorm(p = 1 - (1 - level) / 2) - res_sel <- data.frame(object$parameters[,1] - sc * std, object$parameters[,1] + sc * std) + res_sel <- data.frame(object$selection$coefficients - sc * std, object$selection$coefficients + sc * std) colnames(res_sel) <- c(paste0(100 * (1 - level) / 2, "%"), paste0(100 * (1 - (1 - level) / 2), "%")) } if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) { if (object$control$control_inference$vars_selection == FALSE) { - res_out <- confint(object$outcome) + res_out <- confint(object$outcome[[1]]) } else { std <- sqrt(diag(vcov(object)[[1]])) sc <- qnorm(p = 1 - (1 - level) / 2) - res_out <- data.frame(object$beta[,1] - sc * std, object$beta[,1] + sc * std) + res_out <- data.frame(object$outcome[[1]]$coefficients - sc * std, object$outcome[[1]]$coefficients + sc * std) colnames(res_out) <- c(paste0(100 * (1 - level) / 2, "%"), paste0(100 * (1 - (1 - level) / 2), "%")) } @@ -463,14 +463,14 @@ confint.nonprobsvy <- function(object, vcov.nonprobsvy <- function(object, ...) { # TODO consider different vcov methods for selection and outcome models if (object$control$control_inference$vars_selection == TRUE) { - if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- object$outcome$variance_covariance + if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- object$outcome[[1]]$variance_covariance if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- object$selection$variance_covariance if (class(object)[2] == "nonprobsvy_mi") res <- list(outcome = res_out) if (class(object)[2] == "nonprobsvy_ipw") res <- list(selection = res_sel) if (class(object)[2] == "nonprobsvy_dr") res <- list(selection = res_sel, outcome = res_out) } else { - if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- vcov(object$outcome) + if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- vcov(object$outcome[[1]]) if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- object$selection$variance_covariance if (class(object)[2] == "nonprobsvy_mi") res <- list(outcome = res_out) if (class(object)[2] == "nonprobsvy_ipw") res <- list(selection = res_sel) @@ -483,6 +483,6 @@ vcov.nonprobsvy <- function(object, #' @exportS3Method deviance.nonprobsvy <- function(object, ...) { - res_out <- object$outcome$deviance + res_out <- object$outcome[[1]]$deviance # TODO for selection model - use selection object } diff --git a/R/nonprobDR.R b/R/nonprobDR.R index 7a2c238..b1ab626 100644 --- a/R/nonprobDR.R +++ b/R/nonprobDR.R @@ -290,6 +290,7 @@ nonprobDR <- function(selection, ps_nons_der <- selection_model$ps_nons_der est_ps_rand_der <- selection_model$est_ps_rand_der theta_standard_errors <- sqrt(diag(selection_model$variance_covariance)) + names(theta_hat) <- names(selection_model$coefficients) <- colnames(X) y_rand_pred <- outcome_model$y_rand_pred y_nons_pred <- outcome_model$y_nons_pred @@ -389,7 +390,7 @@ nonprobDR <- function(selection, #log_likelihood <- est_method_obj$log_likelihood #df_residual <- est_method_obj$df_residual - names(theta_hat) <- colnames(X) + names(selection_model$theta_hat) <- names(theta_hat) <- colnames(X) weights_nons <- 1/ps_nons N_nons <- sum(weights * weights_nons) N_rand <- sum(weights_rand) @@ -475,6 +476,28 @@ nonprobDR <- function(selection, X_nons <- OutcomeModel$X_nons <- SelectionModel$X_nons <- X[loc_nons,] SelectionModel$pop_totals <- c(SelectionModel$pop_totals[1], SelectionModel$pop_totals[idx+1]) } + + if (is.null(start)) { + if (control_selection$start_type == "glm") { + start <- start_fit(X = SelectionModel$X_nons, # <--- does not work with pop_totals + R = R, + weights = weights, + weights_rand = weights_rand, + method_selection = method_selection) + } else if (control_selection$start_type == "naive") { + start_h <- theta_h_estimation(R = R, + X = SelectionModel$X_nons[,1,drop=FALSE], + weights_rand = weights_rand, + weights = weights, + h = h, + method_selection = method_selection, + start = 0, + maxit = maxit, + pop_totals = SelectionModel$pop_totals[1])$theta_h + start <- c(start_h, rep(0, ncol(X) - 1)) + } + } + h_object <- theta_h_estimation(R = R, X = SelectionModel$X_nons, weights_rand = NULL, diff --git a/R/nonprobIPW.R b/R/nonprobIPW.R index 98eea2f..a586f30 100644 --- a/R/nonprobIPW.R +++ b/R/nonprobIPW.R @@ -163,7 +163,7 @@ nonprobIPW <- function(selection, est_ps_rand_der <- selection_model$est_ps_rand_der theta_standard_errors <- sqrt(diag(selection_model$variance_covariance)) - names(selection_model$theta_hat) <- colnames(X) + names(theta_hat) <- names(selection_model$theta_hat) <- colnames(X) weights_nons <- 1/ps_nons N <- sum(weights * weights_nons) @@ -252,6 +252,28 @@ nonprobIPW <- function(selection, ################ ESTIMATION pop_totals <- model$pop_totals[idx] } + + if (is.null(start)) { + if (control_selection$start_type == "glm") { + start <- start_fit(X = X, # <--- does not work with pop_totals + R = R, + weights = weights, + weights_rand = weights_rand, + method_selection = method_selection) + } else if (control_selection$start_type == "naive") { + start_h <- theta_h_estimation(R = R, + X = X[,1,drop=FALSE], + weights_rand = weights_rand, + weights = weights, + h = h, + method_selection = method_selection, + start = 0, + maxit = maxit, + pop_totals = pop_totals[1])$theta_h + start <- c(start_h, rep(0, ncol(X) - 1)) + } + } + h_object <- theta_h_estimation(R = R, X = X, weights_rand = weights_rand, diff --git a/README.Rmd b/README.Rmd index c37005f..a88223e 100644 --- a/README.Rmd +++ b/README.Rmd @@ -36,7 +36,7 @@ You can install the development version of `nonprobsvy` package from [Github](ht remotes::install_github("ncn-foreigners/nonprobsvy") ``` -## Example +## Examples Simulate example data from the following paper: Kim, Jae Kwang, and Zhonglei Wang. "Sampling techniques for big data analysis." International Statistical Review 87 (2019): S177-S191 [section 5.2] diff --git a/README.md b/README.md index 247a0fe..b853480 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ You can install the development version of `nonprobsvy` package from remotes::install_github("ncn-foreigners/nonprobsvy") ``` -## Example +## Examples Simulate example data from the following paper: Kim, Jae Kwang, and Zhonglei Wang. “Sampling techniques for big data analysis.” @@ -88,33 +88,27 @@ Results summary(result_dr) #> #> Call: -#> nonprob(selection = ~x2, outcome = y1 ~ x1 + x2, data = subset(population, -#> flag_bd1 == 1), svydesign = sample_prob) +#> nonprob(data = subset(population, flag_bd1 == 1), selection = ~x2, +#> outcome = y1 ~ x1 + x2, svydesign = sample_prob) #> #> ------------------------- #> Estimated population mean: 2.95 with overall std.err of: 0.04195 #> And std.err for nonprobability and probability samples being respectively: #> 0.000783 and 0.04195 #> -#> Based on: Doubly-Robust method -#> #> 95% Confidence inverval for popualtion mean: -#> lower_bound upper_bound -#> normal 2.867789 3.03224 +#> lower_bound upper_bound +#> y1 2.867789 3.03224 +#> #> -#> For a population of estimate size: 1025111 +#> Based on: Doubly-Robust method +#> For a population of estimate size: #> Obtained on a nonprobability sample of size: 693011 #> With an auxiliary probability sample of size: 1000 #> ------------------------- #> #> Regression coefficients: #> ----------------------- -#> For glm regression on selection variable: -#> Estimate Std. Error z value P(>|z|) -#> (Intercept) -0.499105 0.003702 -134.8 <2e-16 *** -#> x2 1.885532 0.005303 355.6 <2e-16 *** -#> -#> ----------------------- #> For glm regression on outcome variable: #> Estimate Std. Error z value P(>|z|) #> (Intercept) 0.996282 0.002139 465.8 <2e-16 *** @@ -122,6 +116,12 @@ summary(result_dr) #> x2 0.999125 0.001098 910.2 <2e-16 *** #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> +#> ----------------------- +#> For glm regression on selection variable: +#> Estimate Std. Error z value P(>|z|) +#> (Intercept) -0.498997 0.003702 -134.8 <2e-16 *** +#> x2 1.885629 0.005303 355.6 <2e-16 *** #> ------------------------- #> #> Weights: @@ -131,10 +131,10 @@ summary(result_dr) #> #> Residuals: #> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> -0.99999 0.06604 0.23781 0.26048 0.44361 0.62225 +#> -0.99999 0.06603 0.23778 0.26046 0.44358 0.62222 #> -#> AIC: 1010626 -#> BIC: 1010672 +#> AIC: 1010622 +#> BIC: 1010645 #> Log-Likelihood: -505309 on 694009 Degrees of freedom ``` @@ -154,20 +154,20 @@ Results summary(result_mi) #> #> Call: -#> nonprob(outcome = y1 ~ x1 + x2, data = subset(population, flag_bd1 == -#> 1), svydesign = sample_prob) +#> nonprob(data = subset(population, flag_bd1 == 1), outcome = y1 ~ +#> x1 + x2, svydesign = sample_prob) #> #> ------------------------- #> Estimated population mean: 2.95 with overall std.err of: 0.04203 #> And std.err for nonprobability and probability samples being respectively: #> 0.001227 and 0.04201 #> -#> Based on: Mass Imputation method -#> #> 95% Confidence inverval for popualtion mean: -#> lower_bound upper_bound -#> normal 2.867433 3.032186 +#> lower_bound upper_bound +#> y1 2.867433 3.032186 +#> #> +#> Based on: Mass Imputation method #> For a population of estimate size: 1e+06 #> Obtained on a nonprobability sample of size: 693011 #> With an auxiliary probability sample of size: 1000 @@ -201,21 +201,21 @@ Results summary(result_ipw) #> #> Call: -#> nonprob(selection = ~x2, target = ~y1, data = subset(population, -#> flag_bd1 == 1), svydesign = sample_prob) +#> nonprob(data = subset(population, flag_bd1 == 1), selection = ~x2, +#> target = ~y1, svydesign = sample_prob) #> #> ------------------------- -#> Estimated population mean: 2.925 with overall std.err of: 0.04999 +#> Estimated population mean: 2.925 with overall std.err of: 0.05 #> And std.err for nonprobability and probability samples being respectively: -#> 0.001117 and 0.04997 -#> -#> Based on: Inverse probability weighted method +#> 0.001586 and 0.04997 #> #> 95% Confidence inverval for popualtion mean: -#> lower_bound upper_bound -#> normal 2.826789 3.022736 +#> lower_bound upper_bound +#> y1 2.82679 3.022776 #> -#> For a population of estimate size: 1025111 +#> +#> Based on: Inverse probability weighted method +#> For a population of estimate size: 1025063 #> Obtained on a nonprobability sample of size: 693011 #> With an auxiliary probability sample of size: 1000 #> ------------------------- @@ -224,8 +224,8 @@ summary(result_ipw) #> ----------------------- #> For glm regression on selection variable: #> Estimate Std. Error z value P(>|z|) -#> (Intercept) -0.499105 0.003702 -134.8 <2e-16 *** -#> x2 1.885532 0.005303 355.6 <2e-16 *** +#> (Intercept) -0.498997 0.003702 -134.8 <2e-16 *** +#> x2 1.885629 0.005303 355.6 <2e-16 *** #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> ------------------------- @@ -237,10 +237,10 @@ summary(result_ipw) #> #> Residuals: #> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> -0.99999 0.06604 0.23781 0.26048 0.44361 0.62225 +#> -0.99999 0.06603 0.23778 0.26046 0.44358 0.62222 #> -#> AIC: 1010626 -#> BIC: 1010672 +#> AIC: 1010622 +#> BIC: 1010645 #> Log-Likelihood: -505309 on 694009 Degrees of freedom ``` diff --git a/man/controlSel.Rd b/man/controlSel.Rd index d415025..b07fe92 100644 --- a/man/controlSel.Rd +++ b/man/controlSel.Rd @@ -23,7 +23,8 @@ controlSel( lambda_min = 0.001, nlambda = 50, nfolds = 10, - print_level = 0 + print_level = 0, + start_type = c("glm", "naive") ) } \arguments{ @@ -71,6 +72,14 @@ controlSel( \item{nfolds}{The number of folds for cross validation. Default is 10.} \item{print_level}{this argument determines the level of printing which is done during the optimization (for propensity score model) process.} + +\item{start_type}{\itemize{ +\item Type of method for start points for model fitting taking the following values +\itemize{ +\item if \code{glm} then start taken from the glm function called on samples. +\item if \code{naive} then start consists of a vector which has the value of an estimated parameter for one-dimensional data (on intercept) and 0 for the rest. +} +}} } \value{ List with selected parameters.