From 40d5c06b0cdbc77b46cc9e5892aab3438118d7f5 Mon Sep 17 00:00:00 2001
From: LukaszChrostowski <lukchr@st.amu.edu.pl>
Date: Sun, 5 Nov 2023 16:35:50 +0100
Subject: [PATCH] add another start method, readme update

---
 R/EstimationMethods.R | 43 ++++++++++++++++++++++---
 R/control.R           | 11 +++++--
 R/internals.R         | 54 +++++++++++++++----------------
 R/methods.R           | 36 ++++++++++-----------
 R/nonprobDR.R         | 25 ++++++++++++++-
 R/nonprobIPW.R        | 24 +++++++++++++-
 README.Rmd            |  2 +-
 README.md             | 74 +++++++++++++++++++++----------------------
 man/controlSel.Rd     | 11 ++++++-
 9 files changed, 187 insertions(+), 93 deletions(-)

diff --git a/R/EstimationMethods.R b/R/EstimationMethods.R
index cc00a1d..76d525d 100644
--- a/R/EstimationMethods.R
+++ b/R/EstimationMethods.R
@@ -97,11 +97,21 @@ mle <- function(...) {
 
     # initial values for propensity score estimation
     if (is.null(start)) {
-      start <- start_fit(X = X,
-                         R = R,
-                         weights = weights,
-                         weights_rand = weights_rand,
-                         method_selection = method_selection)
+      if (control_selection$start_type == "glm") {
+        start <- start_fit(X = X,
+                           R = R,
+                           weights = weights,
+                           weights_rand = weights_rand,
+                           method_selection = method_selection)
+      } else if (control_selection$start_type == "naive") {
+        intercept_start <- max_lik(X_nons = X_nons[, 1, drop = FALSE],
+                                   X_rand = X_rand[, 1, drop = FALSE],
+                                   weights = weights,
+                                   weights_rand = weights_rand,
+                                   start = 0,
+                                   control = control_selection)$theta_hat
+        start <- c(intercept_start, rep(0, ncol(X_nons) - 1))
+      }
     }
 
     df_reduced <- nrow(X) - length(start)
@@ -216,6 +226,7 @@ gee <- function(...) {
                               h = h,
                               est_method,
                               maxit,
+                              control_selection,
                               start,
                               varcov = FALSE,
                               ...){
@@ -223,6 +234,28 @@ gee <- function(...) {
     method_selection_function <- paste(method_selection, "_model_nonprobsvy", sep = "")
     method <- get_method(method = method_selection_function)
     inv_link <- method$make_link_inv
+
+    if (is.null(start)) {
+      if (control_selection$start_type == "glm") {
+        start0 <- start_fit(X = X, # <--- does not work with pop_totals
+                            R = R,
+                            weights = weights,
+                            weights_rand = weights_rand,
+                            method_selection = method_selection)
+      } else if (control_selection$start_type == "naive") {
+        start_h <- theta_h_estimation(R = R,
+                                      X = X[, 1, drop = FALSE],
+                                      weights_rand = weights_rand,
+                                      weights = weights,
+                                      h = h,
+                                      method_selection = method_selection,
+                                      maxit = maxit,
+                                      start = 0)
+        start <- c(start_h, rep(0, ncol(X) - 1))
+      }
+  }
+
+
     h_object <- theta_h_estimation(R = R,
                                    X = X,
                                    weights_rand = weights_rand,
diff --git a/R/control.R b/R/control.R
index dad4d2c..d4de0d9 100644
--- a/R/control.R
+++ b/R/control.R
@@ -29,6 +29,11 @@
 #' @param nlambda The number of `lambda` values. Default is 50.
 #' @param nfolds The number of folds for cross validation. Default is 10.
 #' @param print_level this argument determines the level of printing which is done during the optimization (for propensity score model) process.
+#' @param start_type - Type of method for start points for model fitting taking the following values
+#' \itemize{
+#' \item if \code{glm} then start taken from the glm function called on samples.
+#' \item if \code{naive} then start consists of a vector which has the value of an estimated parameter for one-dimensional data (on intercept) and 0 for the rest.
+#' }
 #'
 #' @return List with selected parameters.
 #'
@@ -52,7 +57,8 @@ controlSel <- function(method = "glm.fit", # perhaps another control function fo
                        lambda_min = .001,
                        nlambda = 50,
                        nfolds = 10,
-                       print_level = 0
+                       print_level = 0,
+                       start_type = c("glm", "naive")
                        ) {
 
   list(epsilon = epsilon,
@@ -72,7 +78,8 @@ controlSel <- function(method = "glm.fit", # perhaps another control function fo
        nlambda = nlambda,
        nfolds = nfolds,
        lambda = lambda,
-       print_level = print_level
+       print_level = print_level,
+       start_type = if(missing(start_type)) "naive" else start_type
       )
 
 }
diff --git a/R/internals.R b/R/internals.R
index c91b8ce..96120ba 100644
--- a/R/internals.R
+++ b/R/internals.R
@@ -74,30 +74,30 @@ theta_h_estimation <- function(R,
                                pop_means = NULL){ # TODO with BERENZ recommendation
 
   p <- ncol(X)
-  if (is.null(pop_totals) & is.null(pop_means)) {
-    if (is.null(start)) {
-      start0 <- start_fit(X = X, # <--- does not work with pop_totals
-                          R = R,
-                          weights = weights,
-                          weights_rand = weights_rand,
-                          method_selection = method_selection)
-    } else {
-      start0 <- start
-    }
-  } else { # TODO customize start point for fitting with population totals
-    # start0 <- rep(.8, ncol(X))
-    # X_pop <- rbind(X, pop_totals)
-    # weights_randd <- 1
-    if (is.null(start)) {
-      start0 <- start_fit(X = X, # <--- does not work with pop_totals
-                          R = R,
-                          weights = weights,
-                          weights_rand = weights_rand,
-                          method_selection = method_selection)
-    } else {
-      start0 <- start
-    }
-  }
+  # if (is.null(pop_totals) & is.null(pop_means)) {
+  #   if (is.null(start)) {
+  #     start0 <- start_fit(X = X, # <--- does not work with pop_totals
+  #                         R = R,
+  #                         weights = weights,
+  #                         weights_rand = weights_rand,
+  #                         method_selection = method_selection)
+  #   } else {
+  #     start0 <- start
+  #   }
+  # } else { # TODO customize start point for fitting with population totals
+  #   # start0 <- rep(.8, ncol(X))
+  #   # X_pop <- rbind(X, pop_totals)
+  #   # weights_randd <- 1
+  #   if (is.null(start)) {
+  #     start0 <- start_fit(X = X, # <--- does not work with pop_totals
+  #                         R = R,
+  #                         weights = weights,
+  #                         weights_rand = weights_rand,
+  #                         method_selection = method_selection)
+  #   } else {
+  #     start0 <- start
+  #   }
+  # }
   u_theta <- u_theta(R = R,
                      X = X,
                      weights = c(weights_rand, weights),
@@ -112,7 +112,7 @@ theta_h_estimation <- function(R,
                              method_selection = method_selection,
                              pop_totals = pop_totals)
 
-  root <- nleqslv::nleqslv(x = start0,
+  root <- nleqslv::nleqslv(x = start,
                            fn = u_theta,
                            method = "Newton", # TODO consider the methods
                            global = "cline", #qline",
@@ -123,7 +123,7 @@ theta_h_estimation <- function(R,
                            )
 
 
-  start <- root$x
+  theta_root <- root$x
   if (root$termcd %in% c(2:7, -10)) {
     switch(as.character(root$termcd),
            "2" = warning("Relatively convergent algorithm when fitting selection model by nleqslv, but user must check if function values are acceptably small."),
@@ -134,7 +134,7 @@ theta_h_estimation <- function(R,
            "7" = warning("Jacobian is unusable when fitting selection model by nleqslv."),
            "-10" = warning("user specified Jacobian is incorrect when fitting selection model by nleqslv."))
   }
-  theta_h <- as.vector(start)
+  theta_h <- as.vector(theta_root)
   grad <- u_theta(theta_h)
   hess <- u_theta_der(theta_h) # TODO compare with root$jac
   #hess <- root$jac
diff --git a/R/methods.R b/R/methods.R
index 9541c6b..e789a6b 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -272,14 +272,14 @@ residuals.nonprobsvy <- function(object,
 
   if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) {
     if (object$control$control_inference$vars_selection == FALSE) {
-      res_out <- residuals(object$outcome) # TODO for variable selection
+      res_out <- residuals(object$outcome[[1]]) # TODO for variable selection
     } else { # TODO for variable selection
       r <- object$outcome$family$residuals
       res_out <- switch(type,
                         "response" = r,
-                        "working" = r/object$outcome$family$mu,
+                        "working" = r/object$outcome[[1]]$family$mu,
                         # TODO "deviance" =
-                        "pearson" = r/sqrt(object$outcome$family$variance)
+                        "pearson" = r/sqrt(object$outcome[[1]]$family$variance)
                         )
     }
   }
@@ -311,7 +311,7 @@ cooks.distance.nonprobsvy <- function(model, # TODO for variable selection
   resids <- residuals(model, type = "pearsonSTD")^2
   hats <- hatvalues(model)
   res_sel <- (resids * (hats / (length(coef(model))))) # TODO
-  res_out <- cooks.distance(model$outcome$glm)
+  res_out <- cooks.distance(model$outcome[[1]])
 
 }
 #' @method hatvalues nonprobsvy
@@ -321,7 +321,7 @@ cooks.distance.nonprobsvy <- function(model, # TODO for variable selection
 hatvalues.nonprobsvy <- function(model,
                                  ...) { # TODO reduce execution time and glm.fit object and customise to variable selection
   if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(model))) {
-    propensity_scores <- model$prop_scores
+    propensity_scores <- model$prob
     W <- Matrix::Diagonal(x = propensity_scores * (1 - propensity_scores))
     XWX_inv <-  solve(t(model$X) %*% W %*% model$X)
     hat_values_sel <- vector(mode = "numeric", length = length(propensity_scores))
@@ -348,10 +348,10 @@ logLik.nonprobsvy <- function(object, ...) {
   if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) {
     val_sel <- object$selection$log_likelihood
     attr(val_sel, "nobs") <- dim(residuals(object, type = "pearson"))[1]
-    attr(val_sel, "df") <- nrow(object$parameters) #length(object$coefficients)
+    attr(val_sel, "df") <- length(object$selection$coefficients)
     class(val_sel) <- "logLik"
   }
-  val_out <- ifelse(any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object)), logLik(object$outcome), 0) # TODO for gee
+  val_out <- ifelse(any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object)), logLik(object$outcome[[1]]), 0) # TODO for gee
   if (class(object)[2] == "nonprobsvy_mi") val <- c("outcome" = val_out)
   if (class(object)[2] == "nonprobsvy_ipw") val <- c("selection" = val_sel)
   if (class(object)[2] == "nonprobsvy_dr") val <- c("selection" = val_sel, "outcome" = val_out)
@@ -363,8 +363,8 @@ logLik.nonprobsvy <- function(object, ...) {
 AIC.nonprobsvy <- function(object,
                            ...) {
   if (!is.character(object$selection$log_likelihood)) {
-    if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- 2 * (length(object$parameters) - object$selection$log_likelihood)
-    if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- object$outcome$aic
+    if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- 2 * (length(object$selection$coefficients) - object$selection$log_likelihood)
+    if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- object$outcome[[1]]$aic
     if (class(object)[2] == "nonprobsvy_mi") res <- c("outcome" = res_out)
     if (class(object)[2] == "nonprobsvy_ipw") res <- c("selection" = res_sel)
     if (class(object)[2] == "nonprobsvy_dr") res <- c("selection" = res_sel, "outcome" = res_out)
@@ -380,15 +380,15 @@ AIC.nonprobsvy <- function(object,
 BIC.nonprobsvy <- function(object,
                            ...) {
   if (!is.character(object$selection$log_likelihood)) {
-    if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- length(object$parameters) * log(object$nonprob_size + object$prob_size) - 2 * object$selection$log_likelihood
+    if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- length(object$selection$coefficients) * log(object$nonprob_size + object$prob_size) - 2 * object$selection$log_likelihood
     if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) {
 
-      if (!is.null(object$outcome$coefficients)) {
+      if (!is.null(object$outcome[[1]]$coefficients)) {
         if (object$control$control_inference$vars_selection == TRUE) {
           options(AIC="BIC")
           res_out <- HelpersMG::ExtractAIC.glm(object$outcome)[2]
         } else {
-          res_out <- BIC(object$outcome)
+          res_out <- BIC(object$outcome[[1]])
         }
       } else {
         res_out <- "not available for this methos"
@@ -425,17 +425,17 @@ confint.nonprobsvy <- function(object,
   if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) {
     std <- sqrt(diag(vcov(object)[[1]]))
     sc <- qnorm(p = 1 - (1 - level) / 2)
-    res_sel <- data.frame(object$parameters[,1] - sc * std, object$parameters[,1] + sc * std)
+    res_sel <- data.frame(object$selection$coefficients - sc * std, object$selection$coefficients + sc * std)
     colnames(res_sel) <- c(paste0(100 * (1 - level) / 2, "%"),
                            paste0(100 * (1 - (1 - level) / 2), "%"))
   }
   if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) {
     if (object$control$control_inference$vars_selection == FALSE) {
-    res_out <- confint(object$outcome)
+    res_out <- confint(object$outcome[[1]])
     } else {
       std <- sqrt(diag(vcov(object)[[1]]))
       sc <- qnorm(p = 1 - (1 - level) / 2)
-      res_out <- data.frame(object$beta[,1] - sc * std, object$beta[,1] + sc * std)
+      res_out <- data.frame(object$outcome[[1]]$coefficients - sc * std, object$outcome[[1]]$coefficients + sc * std)
       colnames(res_out) <- c(paste0(100 * (1 - level) / 2, "%"),
                              paste0(100 * (1 - (1 - level) / 2), "%"))
     }
@@ -463,14 +463,14 @@ confint.nonprobsvy <- function(object,
 vcov.nonprobsvy <- function(object,
                             ...) { # TODO consider different vcov methods for selection and outcome models
   if (object$control$control_inference$vars_selection == TRUE) {
-    if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- object$outcome$variance_covariance
+    if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- object$outcome[[1]]$variance_covariance
     if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object)))  res_sel <- object$selection$variance_covariance
     if (class(object)[2] == "nonprobsvy_mi") res <- list(outcome = res_out)
     if (class(object)[2] == "nonprobsvy_ipw") res <- list(selection = res_sel)
     if (class(object)[2] == "nonprobsvy_dr") res <- list(selection = res_sel, outcome = res_out)
 
   } else {
-    if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- vcov(object$outcome)
+    if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- vcov(object$outcome[[1]])
     if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object)))  res_sel <- object$selection$variance_covariance
     if (class(object)[2] == "nonprobsvy_mi") res <- list(outcome = res_out)
     if (class(object)[2] == "nonprobsvy_ipw") res <- list(selection = res_sel)
@@ -483,6 +483,6 @@ vcov.nonprobsvy <- function(object,
 #' @exportS3Method
 deviance.nonprobsvy <- function(object,
                                 ...) {
-  res_out <- object$outcome$deviance
+  res_out <- object$outcome[[1]]$deviance
   # TODO for selection model - use selection object
 }
diff --git a/R/nonprobDR.R b/R/nonprobDR.R
index 7a2c238..b1ab626 100644
--- a/R/nonprobDR.R
+++ b/R/nonprobDR.R
@@ -290,6 +290,7 @@ nonprobDR <- function(selection,
         ps_nons_der <- selection_model$ps_nons_der
         est_ps_rand_der <- selection_model$est_ps_rand_der
         theta_standard_errors <- sqrt(diag(selection_model$variance_covariance))
+        names(theta_hat) <- names(selection_model$coefficients) <- colnames(X)
 
         y_rand_pred <- outcome_model$y_rand_pred
         y_nons_pred <- outcome_model$y_nons_pred
@@ -389,7 +390,7 @@ nonprobDR <- function(selection,
         #log_likelihood <- est_method_obj$log_likelihood
         #df_residual <- est_method_obj$df_residual
 
-        names(theta_hat) <- colnames(X)
+        names(selection_model$theta_hat) <- names(theta_hat) <- colnames(X)
         weights_nons <- 1/ps_nons
         N_nons <- sum(weights * weights_nons)
         N_rand <- sum(weights_rand)
@@ -475,6 +476,28 @@ nonprobDR <- function(selection,
         X_nons <- OutcomeModel$X_nons <- SelectionModel$X_nons <- X[loc_nons,]
         SelectionModel$pop_totals <- c(SelectionModel$pop_totals[1], SelectionModel$pop_totals[idx+1])
       }
+
+      if (is.null(start)) {
+        if (control_selection$start_type == "glm") {
+          start <- start_fit(X = SelectionModel$X_nons, # <--- does not work with pop_totals
+                             R = R,
+                             weights = weights,
+                             weights_rand = weights_rand,
+                             method_selection = method_selection)
+        } else if (control_selection$start_type == "naive") {
+          start_h <- theta_h_estimation(R = R,
+                                        X = SelectionModel$X_nons[,1,drop=FALSE],
+                                        weights_rand = weights_rand,
+                                        weights = weights,
+                                        h = h,
+                                        method_selection = method_selection,
+                                        start = 0,
+                                        maxit = maxit,
+                                        pop_totals = SelectionModel$pop_totals[1])$theta_h
+          start <- c(start_h, rep(0, ncol(X) - 1))
+        }
+      }
+
       h_object <- theta_h_estimation(R = R,
                                      X = SelectionModel$X_nons,
                                      weights_rand = NULL,
diff --git a/R/nonprobIPW.R b/R/nonprobIPW.R
index 98eea2f..a586f30 100644
--- a/R/nonprobIPW.R
+++ b/R/nonprobIPW.R
@@ -163,7 +163,7 @@ nonprobIPW <- function(selection,
         est_ps_rand_der <- selection_model$est_ps_rand_der
         theta_standard_errors <- sqrt(diag(selection_model$variance_covariance))
 
-        names(selection_model$theta_hat) <- colnames(X)
+        names(theta_hat) <- names(selection_model$theta_hat) <- colnames(X)
         weights_nons <- 1/ps_nons
         N <- sum(weights * weights_nons)
 
@@ -252,6 +252,28 @@ nonprobIPW <- function(selection,
           ################ ESTIMATION
           pop_totals <- model$pop_totals[idx]
         }
+
+        if (is.null(start)) {
+          if (control_selection$start_type == "glm") {
+            start <- start_fit(X = X, # <--- does not work with pop_totals
+                               R = R,
+                               weights = weights,
+                               weights_rand = weights_rand,
+                               method_selection = method_selection)
+          } else if (control_selection$start_type == "naive") {
+            start_h <- theta_h_estimation(R = R,
+                                          X = X[,1,drop=FALSE],
+                                          weights_rand = weights_rand,
+                                          weights = weights,
+                                          h = h,
+                                          method_selection = method_selection,
+                                          start = 0,
+                                          maxit = maxit,
+                                          pop_totals = pop_totals[1])$theta_h
+            start <- c(start_h, rep(0, ncol(X) - 1))
+          }
+        }
+
         h_object <- theta_h_estimation(R = R,
                                        X = X,
                                        weights_rand = weights_rand,
diff --git a/README.Rmd b/README.Rmd
index c37005f..a88223e 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -36,7 +36,7 @@ You can install the development version of `nonprobsvy` package from [Github](ht
 remotes::install_github("ncn-foreigners/nonprobsvy")
 ```
 
-## Example
+## Examples
 
 Simulate example data from the following paper: Kim, Jae Kwang, and Zhonglei Wang. "Sampling techniques for big data analysis." International Statistical Review 87 (2019): S177-S191 [section 5.2]
 
diff --git a/README.md b/README.md
index 247a0fe..b853480 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ You can install the development version of `nonprobsvy` package from
 remotes::install_github("ncn-foreigners/nonprobsvy")
 ```
 
-## Example
+## Examples
 
 Simulate example data from the following paper: Kim, Jae Kwang, and
 Zhonglei Wang. “Sampling techniques for big data analysis.”
@@ -88,33 +88,27 @@ Results
 summary(result_dr)
 #> 
 #> Call:
-#> nonprob(selection = ~x2, outcome = y1 ~ x1 + x2, data = subset(population, 
-#>     flag_bd1 == 1), svydesign = sample_prob)
+#> nonprob(data = subset(population, flag_bd1 == 1), selection = ~x2, 
+#>     outcome = y1 ~ x1 + x2, svydesign = sample_prob)
 #> 
 #> -------------------------
 #> Estimated population mean: 2.95 with overall std.err of: 0.04195
 #> And std.err for nonprobability and probability samples being respectively:
 #> 0.000783 and 0.04195
 #> 
-#> Based on: Doubly-Robust method
-#> 
 #> 95% Confidence inverval for popualtion mean:
-#>        lower_bound upper_bound
-#> normal    2.867789     3.03224
+#>    lower_bound upper_bound
+#> y1    2.867789     3.03224
+#> 
 #> 
-#> For a population of estimate size: 1025111
+#> Based on: Doubly-Robust method
+#> For a population of estimate size: 
 #> Obtained on a nonprobability sample of size: 693011
 #> With an auxiliary probability sample of size: 1000
 #> -------------------------
 #> 
 #> Regression coefficients:
 #> -----------------------
-#> For glm regression on selection variable:
-#>              Estimate Std. Error z value P(>|z|)    
-#> (Intercept) -0.499105   0.003702  -134.8  <2e-16 ***
-#> x2           1.885532   0.005303   355.6  <2e-16 ***
-#> 
-#> -----------------------
 #> For glm regression on outcome variable:
 #>             Estimate Std. Error z value P(>|z|)    
 #> (Intercept) 0.996282   0.002139   465.8  <2e-16 ***
@@ -122,6 +116,12 @@ summary(result_dr)
 #> x2          0.999125   0.001098   910.2  <2e-16 ***
 #> ---
 #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#> 
+#> -----------------------
+#> For glm regression on selection variable:
+#>              Estimate Std. Error z value P(>|z|)    
+#> (Intercept) -0.498997   0.003702  -134.8  <2e-16 ***
+#> x2           1.885629   0.005303   355.6  <2e-16 ***
 #> -------------------------
 #> 
 #> Weights:
@@ -131,10 +131,10 @@ summary(result_dr)
 #> 
 #> Residuals:
 #>     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-#> -0.99999  0.06604  0.23781  0.26048  0.44361  0.62225 
+#> -0.99999  0.06603  0.23778  0.26046  0.44358  0.62222 
 #> 
-#> AIC: 1010626
-#> BIC: 1010672
+#> AIC: 1010622
+#> BIC: 1010645
 #> Log-Likelihood: -505309 on 694009 Degrees of freedom
 ```
 
@@ -154,20 +154,20 @@ Results
 summary(result_mi)
 #> 
 #> Call:
-#> nonprob(outcome = y1 ~ x1 + x2, data = subset(population, flag_bd1 == 
-#>     1), svydesign = sample_prob)
+#> nonprob(data = subset(population, flag_bd1 == 1), outcome = y1 ~ 
+#>     x1 + x2, svydesign = sample_prob)
 #> 
 #> -------------------------
 #> Estimated population mean: 2.95 with overall std.err of: 0.04203
 #> And std.err for nonprobability and probability samples being respectively:
 #> 0.001227 and 0.04201
 #> 
-#> Based on: Mass Imputation method
-#> 
 #> 95% Confidence inverval for popualtion mean:
-#>        lower_bound upper_bound
-#> normal    2.867433    3.032186
+#>    lower_bound upper_bound
+#> y1    2.867433    3.032186
+#> 
 #> 
+#> Based on: Mass Imputation method
 #> For a population of estimate size: 1e+06
 #> Obtained on a nonprobability sample of size: 693011
 #> With an auxiliary probability sample of size: 1000
@@ -201,21 +201,21 @@ Results
 summary(result_ipw)
 #> 
 #> Call:
-#> nonprob(selection = ~x2, target = ~y1, data = subset(population, 
-#>     flag_bd1 == 1), svydesign = sample_prob)
+#> nonprob(data = subset(population, flag_bd1 == 1), selection = ~x2, 
+#>     target = ~y1, svydesign = sample_prob)
 #> 
 #> -------------------------
-#> Estimated population mean: 2.925 with overall std.err of: 0.04999
+#> Estimated population mean: 2.925 with overall std.err of: 0.05
 #> And std.err for nonprobability and probability samples being respectively:
-#> 0.001117 and 0.04997
-#> 
-#> Based on: Inverse probability weighted method
+#> 0.001586 and 0.04997
 #> 
 #> 95% Confidence inverval for popualtion mean:
-#>        lower_bound upper_bound
-#> normal    2.826789    3.022736
+#>    lower_bound upper_bound
+#> y1     2.82679    3.022776
 #> 
-#> For a population of estimate size: 1025111
+#> 
+#> Based on: Inverse probability weighted method
+#> For a population of estimate size: 1025063
 #> Obtained on a nonprobability sample of size: 693011
 #> With an auxiliary probability sample of size: 1000
 #> -------------------------
@@ -224,8 +224,8 @@ summary(result_ipw)
 #> -----------------------
 #> For glm regression on selection variable:
 #>              Estimate Std. Error z value P(>|z|)    
-#> (Intercept) -0.499105   0.003702  -134.8  <2e-16 ***
-#> x2           1.885532   0.005303   355.6  <2e-16 ***
+#> (Intercept) -0.498997   0.003702  -134.8  <2e-16 ***
+#> x2           1.885629   0.005303   355.6  <2e-16 ***
 #> ---
 #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 #> -------------------------
@@ -237,10 +237,10 @@ summary(result_ipw)
 #> 
 #> Residuals:
 #>     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-#> -0.99999  0.06604  0.23781  0.26048  0.44361  0.62225 
+#> -0.99999  0.06603  0.23778  0.26046  0.44358  0.62222 
 #> 
-#> AIC: 1010626
-#> BIC: 1010672
+#> AIC: 1010622
+#> BIC: 1010645
 #> Log-Likelihood: -505309 on 694009 Degrees of freedom
 ```
 
diff --git a/man/controlSel.Rd b/man/controlSel.Rd
index d415025..b07fe92 100644
--- a/man/controlSel.Rd
+++ b/man/controlSel.Rd
@@ -23,7 +23,8 @@ controlSel(
   lambda_min = 0.001,
   nlambda = 50,
   nfolds = 10,
-  print_level = 0
+  print_level = 0,
+  start_type = c("glm", "naive")
 )
 }
 \arguments{
@@ -71,6 +72,14 @@ controlSel(
 \item{nfolds}{The number of folds for cross validation. Default is 10.}
 
 \item{print_level}{this argument determines the level of printing which is done during the optimization (for propensity score model) process.}
+
+\item{start_type}{\itemize{
+\item Type of method for start points for model fitting taking the following values
+\itemize{
+\item if \code{glm} then start taken from the glm function called on samples.
+\item if \code{naive} then start consists of a vector which has the value of an estimated parameter for one-dimensional data (on intercept) and 0 for the rest.
+}
+}}
 }
 \value{
 List with selected parameters.