From 2e4d47be6c6e05511239dc04c35c49dbd897aaf3 Mon Sep 17 00:00:00 2001
From: LukaszChrostowski <lukchr@st.amu.edu.pl>
Date: Thu, 23 Nov 2023 11:07:16 +0100
Subject: [PATCH 1/4] readme update

---
 README.Rmd |  23 ++++----
 README.md  | 165 +++++++++++++++--------------------------------------
 2 files changed, 58 insertions(+), 130 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index eb9bf19..37b7ddf 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -2,6 +2,9 @@
 output: github_document
 bibliography: references.bib
 link-citations: true
+editor_options: 
+  markdown: 
+    wrap: 72
 ---
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->
@@ -82,14 +85,14 @@ where set of auxiliary variables (denoted as $\boldsymbol{X}$) is
 available for both sources while $Y$ and $\boldsymbol{d}$ (or
 $\boldsymbol{w}$) is present only in probability sample.
 
-| Sample                  |            | Auxiliary variables $\boldsymbol{X}$  | Target variable $Y$  |  Design ($\boldsymbol{d}$) or calibrated ($\boldsymbol{w}$) weights   |
-|-------------------|--------:|:-------------:|:-------------:|:-------------:|
-| $S_A$ (non-probability) |          1 |             $\checkmark$              |     $\checkmark$     |                                   ?                                   |
-|                         |        ... |             $\checkmark$              |     $\checkmark$     |                                   ?                                   |
-|                         |      $n_A$ |             $\checkmark$              |     $\checkmark$     |                                   ?                                   |
-| $S_B$ (probability)     |    $n_A+1$ |             $\checkmark$              |          ?           |                             $\checkmark$                              |
-|                         |        ... |             $\checkmark$              |          ?           |                             $\checkmark$                              |
-|                         |  $n_A+n_B$ |             $\checkmark$              |          ?           |                             $\checkmark$                              |
+| Sample                  |           | Auxiliary variables $\boldsymbol{X}$ | Target variable $Y$ | Design ($\boldsymbol{d}$) or calibrated ($\boldsymbol{w}$) weights |
+|---------------|--------------:|:-------------:|:-------------:|:-------------:|
+| $S_A$ (non-probability) |         1 |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
+|                         |       ... |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
+|                         |     $n_A$ |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
+| $S_B$ (probability)     |   $n_A+1$ |             $\checkmark$             |          ?          |                            $\checkmark$                            |
+|                         |       ... |             $\checkmark$             |          ?          |                            $\checkmark$                            |
+|                         | $n_A+n_B$ |             $\checkmark$             |          ?          |                            $\checkmark$                            |
 
 ## Basic functionalities
 
@@ -118,7 +121,7 @@ possible scenarios:
 ### When unit-level data is available for non-probability survey only
 
 | Estimator                                                 | Example code                                                                                                                                                                                                                                     |
-|-------------------|-----------------------------------------------------|
+|--------------------|----------------------------------------------------|
 | Mass imputation based on regression imputation            | `` nonprob(outcome = y ~ x1 + x2 + ... + xk, data = nonprob, pop_totals = c(`(Intercept)`=N, x1=tau_x1, x2=tau_x2, ..., xk=tau_xk), method_outcome = "glm", family_outcome = "gaussian") ``                                                      |
 | Inverse probability weighting                             | `` nonprob(selection =  ~ x1 + x2 + ... + xk, target = ~ y, data = nonprob, pop_totals = c(`(Intercept)`=N, x1=tau_x1, x2=tau_x2, ..., xk=tau_xk), method_selection = "logit") ``                                                                |
 | Inverse probability weighting with calibration constraint | `` nonprob(selection =  ~ x1 + x2 + ... + xk, target = ~ y, data = nonprob, pop_totals = c(`(Intercept)`=N, x1=tau_x1, x2=tau_x2, ..., xk=tau_xk), method_selection = "logit", control_selection = controlSel(est_method_sel = "gee", h = 1)) `` |
@@ -127,7 +130,7 @@ possible scenarios:
 ### When unit-level data are available for both surveys
 
 | Estimator                                                                                | Example code                                                                                                                                                                                                                                         |
-|----------------------|--------------------------------------------------|
+|----------------------|-------------------------------------------------|
 | Mass imputation based on regression imputation                                           | `nonprob(outcome = y ~ x1 + x2 + ... + xk, data = nonprob, svydesign = prob, method_outcome = "glm", family_outcome = "gaussian")`                                                                                                                   |
 | Mass imputation based on nearest neighbour imputation                                    | `nonprob(outcome = y ~ x1 + x2 + ... + xk, data = nonprob, svydesign = prob, method_outcome = "nn", family_outcome = "gaussian", control_outcome=controlOutcome(k = 2))`                                                                             |
 | Mass imputation based on predictive mean matching                                        | `nonprob(outcome = y ~ x1 + x2 + ... + xk, data = nonprob, svydesign = prob, method_outcome = "pmm", family_outcome = "gaussian")`                                                                                                                   |
diff --git a/README.md b/README.md
index 2bfa872..8493d63 100644
--- a/README.md
+++ b/README.md
@@ -1,55 +1,36 @@
-
 <!-- README.md is generated from README.Rmd. Please edit that file -->
 
 # `nonprobsvy`: an R package for modern statistical inference methods based on non-probability samples
 
 <!-- badges: start -->
 
-[![R-CMD-check](https://github.com/ncn-foreigners/nonprobsvy/workflows/R-CMD-check/badge.svg)](https://github.com/ncn-foreigners/nonprobsvy/actions)
-[![Codecov test
-coverage](https://codecov.io/gh/ncn-foreigners/nonprobsvy/branch/main/graph/badge.svg)](https://app.codecov.io/gh/ncn-foreigners/nonprobsvy?branch=main)
+[![R-CMD-check](https://github.com/ncn-foreigners/nonprobsvy/workflows/R-CMD-check/badge.svg)](https://github.com/ncn-foreigners/nonprobsvy/actions) [![Codecov test coverage](https://codecov.io/gh/ncn-foreigners/nonprobsvy/branch/main/graph/badge.svg)](https://app.codecov.io/gh/ncn-foreigners/nonprobsvy?branch=main)
 
 <!-- badges: end -->
 
 ## Basic information
 
-The goal of this package is to provide R users access to modern methods
-for non-probability samples when auxiliary information from the
-population or probability sample is available:
+The goal of this package is to provide R users access to modern methods for non-probability samples when auxiliary information from the population or probability sample is available:
 
-- inverse probability weighting estimators with possible calibration
-  constraints ([Chen, Li, and Wu 2020](#ref-chen2020)),
-- mass imputation estimators based in nearest neighbours ([Yang, Kim,
-  and Hwang 2021](#ref-yang2021)), predictive mean matching and
-  regression imputation ([Kim et al. 2021](#ref-kim2021)),
-- doubly robust estimators with bias minimization Yang, Kim, and Song
-  ([2020](#ref-yang2020)).
+-   inverse probability weighting estimators with possible calibration constraints ([Chen, Li, and Wu 2020](#ref-chen2020)),
+-   mass imputation estimators based in nearest neighbours ([Yang, Kim, and Hwang 2021](#ref-yang2021)), predictive mean matching and regression imputation ([Kim et al. 2021](#ref-kim2021)),
+-   doubly robust estimators with bias minimization Yang, Kim, and Song ([2020](#ref-yang2020)).
 
 The package allows for:
 
-- variable section in high-dimensional space using SCAD ([Yang, Kim, and
-  Song 2020](#ref-yang2020)), Lasso and MCP penalty (via the `ncvreg`,
-  `Rcpp`, `RcppArmadillo` packages),
-- estimation of variance using analytical and bootstrap approach (see Wu
-  ([2023](#ref-wu2023))),
-- integration with the `survey` package when probability sample is
-  available Lumley ([2023](#ref-Lumley2023)),
-- different links for selection (`logit`, `probit` and `cloglog`) and
-  outcome (`gaussian`, `binomial` and `poisson`) variables.
+-   variable section in high-dimensional space using SCAD ([Yang, Kim, and Song 2020](#ref-yang2020)), Lasso and MCP penalty (via the `ncvreg`, `Rcpp`, `RcppArmadillo` packages),
+-   estimation of variance using analytical and bootstrap approach (see Wu ([2023](#ref-wu2023))),
+-   integration with the `survey` package when probability sample is available Lumley ([2023](#ref-Lumley2023)),
+-   different links for selection (`logit`, `probit` and `cloglog`) and outcome (`gaussian`, `binomial` and `poisson`) variables.
 
 Details on use of the package be found:
 
-- on the draft (and not proofread) version the book [Modern inference
-  methods for non-probability samples with
-  R](https://ncn-foreigners.github.io/nonprobsvy-book/),
-- example codes that reproduce papers are available at github in the
-  repository [software
-  tutorials](https://github.com/ncn-foreigners/software-tutorials).
+-   on the draft (and not proofread) version the book [Modern inference methods for non-probability samples with R](https://ncn-foreigners.github.io/nonprobsvy-book/),
+-   example codes that reproduce papers are available at github in the repository [software tutorials](https://github.com/ncn-foreigners/software-tutorials).
 
 ## Installation
 
-You can install the recent version of `nonprobsvy` package from
-[Github](https://github.com/ncn-foreigners/nonprobsvy) with:
+You can install the recent version of `nonprobsvy` package from [Github](https://github.com/ncn-foreigners/nonprobsvy) with:
 
 ``` r
 remotes::install_github("ncn-foreigners/nonprobsvy")
@@ -63,43 +44,23 @@ remotes::install_github("ncn-foreigners/nonprobsvy@dev")
 
 ## Basic idea
 
-Consider the following setting where two samples are available:
-non-probability (denoted as $S_A$ ) and probability (denoted as $S_B$)
-where set of auxiliary variables (denoted as $\boldsymbol{X}$) is
-available for both sources while $Y$ and $\boldsymbol{d}$ (or
-$\boldsymbol{w}$) is present only in probability sample.
+Consider the following setting where two samples are available: non-probability (denoted as $S_A$ ) and probability (denoted as $S_B$) where set of auxiliary variables (denoted as $\boldsymbol{X}$) is available for both sources while $Y$ and $\boldsymbol{d}$ (or $\boldsymbol{w}$) is present only in probability sample.
 
 | Sample                  |           | Auxiliary variables $\boldsymbol{X}$ | Target variable $Y$ | Design ($\boldsymbol{d}$) or calibrated ($\boldsymbol{w}$) weights |
 |-------------------------|----------:|:------------------------------------:|:-------------------:|:------------------------------------------------------------------:|
 | $S_A$ (non-probability) |         1 |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
-|                         |         … |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
+|                         |       ... |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
 |                         |     $n_A$ |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
 | $S_B$ (probability)     |   $n_A+1$ |             $\checkmark$             |          ?          |                            $\checkmark$                            |
-|                         |         … |             $\checkmark$             |          ?          |                            $\checkmark$                            |
+|                         |       ... |             $\checkmark$             |          ?          |                            $\checkmark$                            |
 |                         | $n_A+n_B$ |             $\checkmark$             |          ?          |                            $\checkmark$                            |
 
 ## Basic functionalities
 
-Suppose $Y$ is the target variable, $\boldsymbol{X}$ is a matrix of
-auxiliary variables, $R$ is the inclusion indicator. Then, if we are
-interested in estimating the mean $\bar{\tau}_Y$ or the sum $\tau_Y$ of
-the of the target variable given the observed data set
-$(y_k, \boldsymbol{x}_k, R_k)$, we can approach this problem with the
-possible scenarios:
-
-- unit-level data is available for the non-probability sample $A$,
-  i.e. $(y_k, \boldsymbol{x}_k)$ is available for all units $k \in S_A$,
-  and population-level data is available for
-  $\boldsymbol{x}_1, ..., \boldsymbol{x}_p$, denoted as
-  $\tau_{x_1}, \tau_{x_2}, ..., \tau_{x_p}$ and population size $N$ is
-  known. We can also consider situations where population data are
-  estimated (e.g. on the basis of a survey to which we do not have
-  access),
-- unit-level data is available for the non-probability sample $S_A$ and
-  the probability sample $S_B$, i.e. $(y_k, \boldsymbol{x}_k, R_k)$ is
-  determined by the data. is determined by the data: $R_k=1$ if
-  $k \in S_A$ otherwise $R_k=0$, $y_k$ is observed only for sample $S_A$
-  and $\boldsymbol{x}_k$ is observed in both in both $S_A$ and $S_B$,
+Suppose $Y$ is the target variable, $\boldsymbol{X}$ is a matrix of auxiliary variables, $R$ is the inclusion indicator. Then, if we are interested in estimating the mean $\bar{\tau}_Y$ or the sum $\tau_Y$ of the of the target variable given the observed data set $(y_k, \boldsymbol{x}_k, R_k)$, we can approach this problem with the possible scenarios:
+
+-   unit-level data is available for the non-probability sample $A$, i.e. $(y_k, \boldsymbol{x}_k)$ is available for all units $k \in S_A$, and population-level data is available for $\boldsymbol{x}_1, ..., \boldsymbol{x}_p$, denoted as $\tau_{x_1}, \tau_{x_2}, ..., \tau_{x_p}$ and population size $N$ is known. We can also consider situations where population data are estimated (e.g. on the basis of a survey to which we do not have access),
+-   unit-level data is available for the non-probability sample $S_A$ and the probability sample $S_B$, i.e. $(y_k, \boldsymbol{x}_k, R_k)$ is determined by the data. is determined by the data: $R_k=1$ if $k \in S_A$ otherwise $R_k=0$, $y_k$ is observed only for sample $S_A$ and $\boldsymbol{x}_k$ is observed in both in both $S_A$ and $S_B$,
 
 ### When unit-level data is available for non-probability survey only
 
@@ -126,9 +87,7 @@ possible scenarios:
 
 ## Examples
 
-Simulate example data from the following paper: Kim, Jae Kwang, and
-Zhonglei Wang. “Sampling techniques for big data analysis.”
-International Statistical Review 87 (2019): S177-S191 \[section 5.2\]
+Simulate example data from the following paper: Kim, Jae Kwang, and Zhonglei Wang. "Sampling techniques for big data analysis." International Statistical Review 87 (2019): S177-S191 $$section 5.2$$
 
 ``` r
 library(survey)
@@ -158,8 +117,7 @@ sample_prob <- svydesign(ids= ~1, weights = ~ base_w_srs,
                          data = subset(population, flag_srs == 1))
 ```
 
-Estimate population mean of `y1` based on doubly robust estimator using
-IPW with calibration constraints.
+Estimate population mean of `y1` based on doubly robust estimator using IPW with calibration constraints.
 
 ``` r
 result_dr <- nonprob(
@@ -334,69 +292,36 @@ summary(result_ipw)
 
 ## Funding
 
-Work on this package is supported by the National Science Centre, OPUS
-22 grant no. 2020/39/B/HS4/00941.
+Work on this package is supported by the National Science Centre, OPUS 22 grant no. 2020/39/B/HS4/00941.
 
 ## References (selected)
 
-<div id="refs" class="references csl-bib-body hanging-indent">
-
-<div id="ref-chen2020" class="csl-entry">
-
-Chen, Yilin, Pengfei Li, and Changbao Wu. 2020. “Doubly Robust Inference
-With Nonprobability Survey Samples.” *Journal of the American
-Statistical Association* 115 (532): 2011–21.
-<https://doi.org/10.1080/01621459.2019.1677241>.
-
-</div>
-
-<div id="ref-kim2021" class="csl-entry">
-
-Kim, Jae Kwang, Seho Park, Yilin Chen, and Changbao Wu. 2021. “Combining
-Non-Probability and Probability Survey Samples Through Mass Imputation.”
-*Journal of the Royal Statistical Society Series A: Statistics in
-Society* 184 (3): 941–63. <https://doi.org/10.1111/rssa.12696>.
-
-</div>
-
-<div id="ref-Lumley2004" class="csl-entry">
-
-Lumley, Thomas. 2004. “Analysis of Complex Survey Samples.” *Journal of
-Statistical Software* 9 (1): 1–19.
-
-</div>
-
-<div id="ref-Lumley2023" class="csl-entry">
-
-———. 2023. “Survey: Analysis of Complex Survey Samples.”
-
-</div>
-
-<div id="ref-wu2023" class="csl-entry">
-
-Wu, Changbao. 2023. “Statistical Inference with Non-Probability Survey
-Samples.” *Survey Methodology* 48 (2): 283–311.
-<https://www150.statcan.gc.ca/n1/pub/12-001-x/2022002/article/00002-eng.htm>.
-
-</div>
-
-<div id="ref-yang2021" class="csl-entry">
+::: {#refs .references .csl-bib-body .hanging-indent}
+::: {#ref-chen2020 .csl-entry}
+Chen, Yilin, Pengfei Li, and Changbao Wu. 2020. "Doubly Robust Inference With Nonprobability Survey Samples." *Journal of the American Statistical Association* 115 (532): 2011--21. <https://doi.org/10.1080/01621459.2019.1677241>.
+:::
 
-Yang, Shu, Jae Kwang Kim, and Youngdeok Hwang. 2021. “Integration of
-Data from Probability Surveys and Big Found Data for Finite Population
-Inference Using Mass Imputation.” *Survey Methodology* 47 (1): 29–58.
-<https://www150.statcan.gc.ca/n1/pub/12-001-x/2021001/article/00004-eng.htm>.
+::: {#ref-kim2021 .csl-entry}
+Kim, Jae Kwang, Seho Park, Yilin Chen, and Changbao Wu. 2021. "Combining Non-Probability and Probability Survey Samples Through Mass Imputation." *Journal of the Royal Statistical Society Series A: Statistics in Society* 184 (3): 941--63. <https://doi.org/10.1111/rssa.12696>.
+:::
 
-</div>
+::: {#ref-Lumley2004 .csl-entry}
+Lumley, Thomas. 2004. "Analysis of Complex Survey Samples." *Journal of Statistical Software* 9 (1): 1--19.
+:::
 
-<div id="ref-yang2020" class="csl-entry">
+::: {#ref-Lumley2023 .csl-entry}
+---------. 2023. "Survey: Analysis of Complex Survey Samples."
+:::
 
-Yang, Shu, Jae Kwang Kim, and Rui Song. 2020. “Doubly Robust Inference
-When Combining Probability and Non-Probability Samples with High
-Dimensional Data.” *Journal of the Royal Statistical Society Series B:
-Statistical Methodology* 82 (2): 445–65.
-<https://doi.org/10.1111/rssb.12354>.
+::: {#ref-wu2023 .csl-entry}
+Wu, Changbao. 2023. "Statistical Inference with Non-Probability Survey Samples." *Survey Methodology* 48 (2): 283--311. <https://www150.statcan.gc.ca/n1/pub/12-001-x/2022002/article/00002-eng.htm>.
+:::
 
-</div>
+::: {#ref-yang2021 .csl-entry}
+Yang, Shu, Jae Kwang Kim, and Youngdeok Hwang. 2021. "Integration of Data from Probability Surveys and Big Found Data for Finite Population Inference Using Mass Imputation." *Survey Methodology* 47 (1): 29--58. <https://www150.statcan.gc.ca/n1/pub/12-001-x/2021001/article/00004-eng.htm>.
+:::
 
-</div>
+::: {#ref-yang2020 .csl-entry}
+Yang, Shu, Jae Kwang Kim, and Rui Song. 2020. "Doubly Robust Inference When Combining Probability and Non-Probability Samples with High Dimensional Data." *Journal of the Royal Statistical Society Series B: Statistical Methodology* 82 (2): 445--65. <https://doi.org/10.1111/rssb.12354>.
+:::
+:::

From 0b576a02a0e2189708b0ef74089dbdf190c0bb5d Mon Sep 17 00:00:00 2001
From: LukaszChrostowski <lukchr@st.amu.edu.pl>
Date: Thu, 23 Nov 2023 11:10:47 +0100
Subject: [PATCH 2/4] readme update

---
 README.Rmd |   3 +-
 README.md  | 165 ++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 122 insertions(+), 46 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 37b7ddf..38a518d 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -104,7 +104,8 @@ $(y_k, \boldsymbol{x}_k, R_k)$, we can approach this problem with the
 possible scenarios:
 
 -   unit-level data is available for the non-probability sample $A$,
-    i.e. $(y_k, \boldsymbol{x}_k)$ is available for all units
+    i.e. 
+    $(y_k, \boldsymbol{x}_k)$ is available for all units
     $k \in S_A$, and population-level data is available for
     $\boldsymbol{x}_1, ..., \boldsymbol{x}_p$, denoted as
     $\tau_{x_1}, \tau_{x_2}, ..., \tau_{x_p}$ and population size $N$ is
diff --git a/README.md b/README.md
index 8493d63..24d1e70 100644
--- a/README.md
+++ b/README.md
@@ -1,36 +1,55 @@
+
 <!-- README.md is generated from README.Rmd. Please edit that file -->
 
 # `nonprobsvy`: an R package for modern statistical inference methods based on non-probability samples
 
 <!-- badges: start -->
 
-[![R-CMD-check](https://github.com/ncn-foreigners/nonprobsvy/workflows/R-CMD-check/badge.svg)](https://github.com/ncn-foreigners/nonprobsvy/actions) [![Codecov test coverage](https://codecov.io/gh/ncn-foreigners/nonprobsvy/branch/main/graph/badge.svg)](https://app.codecov.io/gh/ncn-foreigners/nonprobsvy?branch=main)
+[![R-CMD-check](https://github.com/ncn-foreigners/nonprobsvy/workflows/R-CMD-check/badge.svg)](https://github.com/ncn-foreigners/nonprobsvy/actions)
+[![Codecov test
+coverage](https://codecov.io/gh/ncn-foreigners/nonprobsvy/branch/main/graph/badge.svg)](https://app.codecov.io/gh/ncn-foreigners/nonprobsvy?branch=main)
 
 <!-- badges: end -->
 
 ## Basic information
 
-The goal of this package is to provide R users access to modern methods for non-probability samples when auxiliary information from the population or probability sample is available:
+The goal of this package is to provide R users access to modern methods
+for non-probability samples when auxiliary information from the
+population or probability sample is available:
 
--   inverse probability weighting estimators with possible calibration constraints ([Chen, Li, and Wu 2020](#ref-chen2020)),
--   mass imputation estimators based in nearest neighbours ([Yang, Kim, and Hwang 2021](#ref-yang2021)), predictive mean matching and regression imputation ([Kim et al. 2021](#ref-kim2021)),
--   doubly robust estimators with bias minimization Yang, Kim, and Song ([2020](#ref-yang2020)).
+- inverse probability weighting estimators with possible calibration
+  constraints ([Chen, Li, and Wu 2020](#ref-chen2020)),
+- mass imputation estimators based in nearest neighbours ([Yang, Kim,
+  and Hwang 2021](#ref-yang2021)), predictive mean matching and
+  regression imputation ([Kim et al. 2021](#ref-kim2021)),
+- doubly robust estimators with bias minimization Yang, Kim, and Song
+  ([2020](#ref-yang2020)).
 
 The package allows for:
 
--   variable section in high-dimensional space using SCAD ([Yang, Kim, and Song 2020](#ref-yang2020)), Lasso and MCP penalty (via the `ncvreg`, `Rcpp`, `RcppArmadillo` packages),
--   estimation of variance using analytical and bootstrap approach (see Wu ([2023](#ref-wu2023))),
--   integration with the `survey` package when probability sample is available Lumley ([2023](#ref-Lumley2023)),
--   different links for selection (`logit`, `probit` and `cloglog`) and outcome (`gaussian`, `binomial` and `poisson`) variables.
+- variable section in high-dimensional space using SCAD ([Yang, Kim, and
+  Song 2020](#ref-yang2020)), Lasso and MCP penalty (via the `ncvreg`,
+  `Rcpp`, `RcppArmadillo` packages),
+- estimation of variance using analytical and bootstrap approach (see Wu
+  ([2023](#ref-wu2023))),
+- integration with the `survey` package when probability sample is
+  available Lumley ([2023](#ref-Lumley2023)),
+- different links for selection (`logit`, `probit` and `cloglog`) and
+  outcome (`gaussian`, `binomial` and `poisson`) variables.
 
 Details on use of the package be found:
 
--   on the draft (and not proofread) version the book [Modern inference methods for non-probability samples with R](https://ncn-foreigners.github.io/nonprobsvy-book/),
--   example codes that reproduce papers are available at github in the repository [software tutorials](https://github.com/ncn-foreigners/software-tutorials).
+- on the draft (and not proofread) version the book [Modern inference
+  methods for non-probability samples with
+  R](https://ncn-foreigners.github.io/nonprobsvy-book/),
+- example codes that reproduce papers are available at github in the
+  repository [software
+  tutorials](https://github.com/ncn-foreigners/software-tutorials).
 
 ## Installation
 
-You can install the recent version of `nonprobsvy` package from [Github](https://github.com/ncn-foreigners/nonprobsvy) with:
+You can install the recent version of `nonprobsvy` package from
+[Github](https://github.com/ncn-foreigners/nonprobsvy) with:
 
 ``` r
 remotes::install_github("ncn-foreigners/nonprobsvy")
@@ -44,23 +63,43 @@ remotes::install_github("ncn-foreigners/nonprobsvy@dev")
 
 ## Basic idea
 
-Consider the following setting where two samples are available: non-probability (denoted as $S_A$ ) and probability (denoted as $S_B$) where set of auxiliary variables (denoted as $\boldsymbol{X}$) is available for both sources while $Y$ and $\boldsymbol{d}$ (or $\boldsymbol{w}$) is present only in probability sample.
+Consider the following setting where two samples are available:
+non-probability (denoted as $S_A$ ) and probability (denoted as $S_B$)
+where set of auxiliary variables (denoted as $\boldsymbol{X}$) is
+available for both sources while $Y$ and $\boldsymbol{d}$ (or
+$\boldsymbol{w}$) is present only in probability sample.
 
 | Sample                  |           | Auxiliary variables $\boldsymbol{X}$ | Target variable $Y$ | Design ($\boldsymbol{d}$) or calibrated ($\boldsymbol{w}$) weights |
 |-------------------------|----------:|:------------------------------------:|:-------------------:|:------------------------------------------------------------------:|
 | $S_A$ (non-probability) |         1 |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
-|                         |       ... |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
+|                         |         … |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
 |                         |     $n_A$ |             $\checkmark$             |    $\checkmark$     |                                 ?                                  |
 | $S_B$ (probability)     |   $n_A+1$ |             $\checkmark$             |          ?          |                            $\checkmark$                            |
-|                         |       ... |             $\checkmark$             |          ?          |                            $\checkmark$                            |
+|                         |         … |             $\checkmark$             |          ?          |                            $\checkmark$                            |
 |                         | $n_A+n_B$ |             $\checkmark$             |          ?          |                            $\checkmark$                            |
 
 ## Basic functionalities
 
-Suppose $Y$ is the target variable, $\boldsymbol{X}$ is a matrix of auxiliary variables, $R$ is the inclusion indicator. Then, if we are interested in estimating the mean $\bar{\tau}_Y$ or the sum $\tau_Y$ of the of the target variable given the observed data set $(y_k, \boldsymbol{x}_k, R_k)$, we can approach this problem with the possible scenarios:
-
--   unit-level data is available for the non-probability sample $A$, i.e. $(y_k, \boldsymbol{x}_k)$ is available for all units $k \in S_A$, and population-level data is available for $\boldsymbol{x}_1, ..., \boldsymbol{x}_p$, denoted as $\tau_{x_1}, \tau_{x_2}, ..., \tau_{x_p}$ and population size $N$ is known. We can also consider situations where population data are estimated (e.g. on the basis of a survey to which we do not have access),
--   unit-level data is available for the non-probability sample $S_A$ and the probability sample $S_B$, i.e. $(y_k, \boldsymbol{x}_k, R_k)$ is determined by the data. is determined by the data: $R_k=1$ if $k \in S_A$ otherwise $R_k=0$, $y_k$ is observed only for sample $S_A$ and $\boldsymbol{x}_k$ is observed in both in both $S_A$ and $S_B$,
+Suppose $Y$ is the target variable, $\boldsymbol{X}$ is a matrix of
+auxiliary variables, $R$ is the inclusion indicator. Then, if we are
+interested in estimating the mean $\bar{\tau}_Y$ or the sum $\tau_Y$ of
+the of the target variable given the observed data set
+$(y_k, \boldsymbol{x}_k, R_k)$, we can approach this problem with the
+possible scenarios:
+
+- unit-level data is available for the non-probability sample $A$, i.e. 
+  $(y_k, \boldsymbol{x}_k)$ is available for all units $k \in S_A$, and
+  population-level data is available for
+  $\boldsymbol{x}_1, ..., \boldsymbol{x}_p$, denoted as
+  $\tau_{x_1}, \tau_{x_2}, ..., \tau_{x_p}$ and population size $N$ is
+  known. We can also consider situations where population data are
+  estimated (e.g. on the basis of a survey to which we do not have
+  access),
+- unit-level data is available for the non-probability sample $S_A$ and
+  the probability sample $S_B$, i.e. $(y_k, \boldsymbol{x}_k, R_k)$ is
+  determined by the data. is determined by the data: $R_k=1$ if
+  $k \in S_A$ otherwise $R_k=0$, $y_k$ is observed only for sample $S_A$
+  and $\boldsymbol{x}_k$ is observed in both in both $S_A$ and $S_B$,
 
 ### When unit-level data is available for non-probability survey only
 
@@ -87,7 +126,9 @@ Suppose $Y$ is the target variable, $\boldsymbol{X}$ is a matrix of auxiliary va
 
 ## Examples
 
-Simulate example data from the following paper: Kim, Jae Kwang, and Zhonglei Wang. "Sampling techniques for big data analysis." International Statistical Review 87 (2019): S177-S191 $$section 5.2$$
+Simulate example data from the following paper: Kim, Jae Kwang, and
+Zhonglei Wang. “Sampling techniques for big data analysis.”
+International Statistical Review 87 (2019): S177-S191 \[section 5.2\]
 
 ``` r
 library(survey)
@@ -117,7 +158,8 @@ sample_prob <- svydesign(ids= ~1, weights = ~ base_w_srs,
                          data = subset(population, flag_srs == 1))
 ```
 
-Estimate population mean of `y1` based on doubly robust estimator using IPW with calibration constraints.
+Estimate population mean of `y1` based on doubly robust estimator using
+IPW with calibration constraints.
 
 ``` r
 result_dr <- nonprob(
@@ -292,36 +334,69 @@ summary(result_ipw)
 
 ## Funding
 
-Work on this package is supported by the National Science Centre, OPUS 22 grant no. 2020/39/B/HS4/00941.
+Work on this package is supported by the National Science Centre, OPUS
+22 grant no. 2020/39/B/HS4/00941.
 
 ## References (selected)
 
-::: {#refs .references .csl-bib-body .hanging-indent}
-::: {#ref-chen2020 .csl-entry}
-Chen, Yilin, Pengfei Li, and Changbao Wu. 2020. "Doubly Robust Inference With Nonprobability Survey Samples." *Journal of the American Statistical Association* 115 (532): 2011--21. <https://doi.org/10.1080/01621459.2019.1677241>.
-:::
+<div id="refs" class="references csl-bib-body hanging-indent">
+
+<div id="ref-chen2020" class="csl-entry">
+
+Chen, Yilin, Pengfei Li, and Changbao Wu. 2020. “Doubly Robust Inference
+With Nonprobability Survey Samples.” *Journal of the American
+Statistical Association* 115 (532): 2011–21.
+<https://doi.org/10.1080/01621459.2019.1677241>.
+
+</div>
+
+<div id="ref-kim2021" class="csl-entry">
+
+Kim, Jae Kwang, Seho Park, Yilin Chen, and Changbao Wu. 2021. “Combining
+Non-Probability and Probability Survey Samples Through Mass Imputation.”
+*Journal of the Royal Statistical Society Series A: Statistics in
+Society* 184 (3): 941–63. <https://doi.org/10.1111/rssa.12696>.
+
+</div>
+
+<div id="ref-Lumley2004" class="csl-entry">
+
+Lumley, Thomas. 2004. “Analysis of Complex Survey Samples.” *Journal of
+Statistical Software* 9 (1): 1–19.
+
+</div>
+
+<div id="ref-Lumley2023" class="csl-entry">
+
+———. 2023. “Survey: Analysis of Complex Survey Samples.”
+
+</div>
+
+<div id="ref-wu2023" class="csl-entry">
+
+Wu, Changbao. 2023. “Statistical Inference with Non-Probability Survey
+Samples.” *Survey Methodology* 48 (2): 283–311.
+<https://www150.statcan.gc.ca/n1/pub/12-001-x/2022002/article/00002-eng.htm>.
+
+</div>
+
+<div id="ref-yang2021" class="csl-entry">
 
-::: {#ref-kim2021 .csl-entry}
-Kim, Jae Kwang, Seho Park, Yilin Chen, and Changbao Wu. 2021. "Combining Non-Probability and Probability Survey Samples Through Mass Imputation." *Journal of the Royal Statistical Society Series A: Statistics in Society* 184 (3): 941--63. <https://doi.org/10.1111/rssa.12696>.
-:::
+Yang, Shu, Jae Kwang Kim, and Youngdeok Hwang. 2021. “Integration of
+Data from Probability Surveys and Big Found Data for Finite Population
+Inference Using Mass Imputation.” *Survey Methodology* 47 (1): 29–58.
+<https://www150.statcan.gc.ca/n1/pub/12-001-x/2021001/article/00004-eng.htm>.
 
-::: {#ref-Lumley2004 .csl-entry}
-Lumley, Thomas. 2004. "Analysis of Complex Survey Samples." *Journal of Statistical Software* 9 (1): 1--19.
-:::
+</div>
 
-::: {#ref-Lumley2023 .csl-entry}
----------. 2023. "Survey: Analysis of Complex Survey Samples."
-:::
+<div id="ref-yang2020" class="csl-entry">
 
-::: {#ref-wu2023 .csl-entry}
-Wu, Changbao. 2023. "Statistical Inference with Non-Probability Survey Samples." *Survey Methodology* 48 (2): 283--311. <https://www150.statcan.gc.ca/n1/pub/12-001-x/2022002/article/00002-eng.htm>.
-:::
+Yang, Shu, Jae Kwang Kim, and Rui Song. 2020. “Doubly Robust Inference
+When Combining Probability and Non-Probability Samples with High
+Dimensional Data.” *Journal of the Royal Statistical Society Series B:
+Statistical Methodology* 82 (2): 445–65.
+<https://doi.org/10.1111/rssb.12354>.
 
-::: {#ref-yang2021 .csl-entry}
-Yang, Shu, Jae Kwang Kim, and Youngdeok Hwang. 2021. "Integration of Data from Probability Surveys and Big Found Data for Finite Population Inference Using Mass Imputation." *Survey Methodology* 47 (1): 29--58. <https://www150.statcan.gc.ca/n1/pub/12-001-x/2021001/article/00004-eng.htm>.
-:::
+</div>
 
-::: {#ref-yang2020 .csl-entry}
-Yang, Shu, Jae Kwang Kim, and Rui Song. 2020. "Doubly Robust Inference When Combining Probability and Non-Probability Samples with High Dimensional Data." *Journal of the Royal Statistical Society Series B: Statistical Methodology* 82 (2): 445--65. <https://doi.org/10.1111/rssb.12354>.
-:::
-:::
+</div>

From 1c734816a0cba8fa5c91f38abe2ef8b766e786fd Mon Sep 17 00:00:00 2001
From: LukaszChrostowski <lukchr@st.amu.edu.pl>
Date: Thu, 23 Nov 2023 11:18:02 +0100
Subject: [PATCH 3/4] readme update

---
 README.Rmd | 15 +++++++--------
 README.md  | 14 +++++++-------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 38a518d..6c78b4c 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -103,12 +103,11 @@ the of the target variable given the observed data set
 $(y_k, \boldsymbol{x}_k, R_k)$, we can approach this problem with the
 possible scenarios:
 
--   unit-level data is available for the non-probability sample $A$,
-    i.e. 
-    $(y_k, \boldsymbol{x}_k)$ is available for all units
-    $k \in S_A$, and population-level data is available for
-    $\boldsymbol{x}_1, ..., \boldsymbol{x}_p$, denoted as
-    $\tau_{x_1}, \tau_{x_2}, ..., \tau_{x_p}$ and population size $N$ is
+-   unit-level data is available for the non-probability sample $S_{A}$,
+    i.e. $(y_{k}, \boldsymbol{x}_{k})$ is available for all units
+    $k \in S_{A}$, and population-level data is available for
+    $\boldsymbol{x}_{1}, ..., \boldsymbol{x}_{p}$, denoted as
+    $\tau_{x_{1}}, \tau_{x_{2}}, ..., \tau_{x_{p}}$ and population size $N$ is
     known. We can also consider situations where population data are
     estimated (e.g. on the basis of a survey to which we do not have
     access),
@@ -122,7 +121,7 @@ possible scenarios:
 ### When unit-level data is available for non-probability survey only
 
 | Estimator                                                 | Example code                                                                                                                                                                                                                                     |
-|--------------------|----------------------------------------------------|
+|---------------------|---------------------------------------------------|
 | Mass imputation based on regression imputation            | `` nonprob(outcome = y ~ x1 + x2 + ... + xk, data = nonprob, pop_totals = c(`(Intercept)`=N, x1=tau_x1, x2=tau_x2, ..., xk=tau_xk), method_outcome = "glm", family_outcome = "gaussian") ``                                                      |
 | Inverse probability weighting                             | `` nonprob(selection =  ~ x1 + x2 + ... + xk, target = ~ y, data = nonprob, pop_totals = c(`(Intercept)`=N, x1=tau_x1, x2=tau_x2, ..., xk=tau_xk), method_selection = "logit") ``                                                                |
 | Inverse probability weighting with calibration constraint | `` nonprob(selection =  ~ x1 + x2 + ... + xk, target = ~ y, data = nonprob, pop_totals = c(`(Intercept)`=N, x1=tau_x1, x2=tau_x2, ..., xk=tau_xk), method_selection = "logit", control_selection = controlSel(est_method_sel = "gee", h = 1)) `` |
@@ -131,7 +130,7 @@ possible scenarios:
 ### When unit-level data are available for both surveys
 
 | Estimator                                                                                | Example code                                                                                                                                                                                                                                         |
-|----------------------|-------------------------------------------------|
+|-----------------------|-------------------------------------------------|
 | Mass imputation based on regression imputation                                           | `nonprob(outcome = y ~ x1 + x2 + ... + xk, data = nonprob, svydesign = prob, method_outcome = "glm", family_outcome = "gaussian")`                                                                                                                   |
 | Mass imputation based on nearest neighbour imputation                                    | `nonprob(outcome = y ~ x1 + x2 + ... + xk, data = nonprob, svydesign = prob, method_outcome = "nn", family_outcome = "gaussian", control_outcome=controlOutcome(k = 2))`                                                                             |
 | Mass imputation based on predictive mean matching                                        | `nonprob(outcome = y ~ x1 + x2 + ... + xk, data = nonprob, svydesign = prob, method_outcome = "pmm", family_outcome = "gaussian")`                                                                                                                   |
diff --git a/README.md b/README.md
index 24d1e70..18b4881 100644
--- a/README.md
+++ b/README.md
@@ -87,13 +87,13 @@ the of the target variable given the observed data set
 $(y_k, \boldsymbol{x}_k, R_k)$, we can approach this problem with the
 possible scenarios:
 
-- unit-level data is available for the non-probability sample $A$, i.e. 
-  $(y_k, \boldsymbol{x}_k)$ is available for all units $k \in S_A$, and
-  population-level data is available for
-  $\boldsymbol{x}_1, ..., \boldsymbol{x}_p$, denoted as
-  $\tau_{x_1}, \tau_{x_2}, ..., \tau_{x_p}$ and population size $N$ is
-  known. We can also consider situations where population data are
-  estimated (e.g. on the basis of a survey to which we do not have
+- unit-level data is available for the non-probability sample $S_{A}$,
+  i.e. $(y_{k}, \boldsymbol{x}_{k})$ is available for all units
+  $k \in S_{A}$, and population-level data is available for
+  $\boldsymbol{x}_{1}, ..., \boldsymbol{x}_{p}$, denoted as
+  $\tau_{x_{1}}, \tau_{x_{2}}, ..., \tau_{x_{p}}$ and population size
+  $N$ is known. We can also consider situations where population data
+  are estimated (e.g. on the basis of a survey to which we do not have
   access),
 - unit-level data is available for the non-probability sample $S_A$ and
   the probability sample $S_B$, i.e. $(y_k, \boldsymbol{x}_k, R_k)$ is

From 0df7c9fb37cd506fdc6bb87ea5b320a5fac91bec Mon Sep 17 00:00:00 2001
From: LukaszChrostowski <lukchr@st.amu.edu.pl>
Date: Mon, 27 Nov 2023 14:41:06 +0100
Subject: [PATCH 4/4] changes in documentation and customize S3methods to bias
 minimization model

---
 DESCRIPTION                     |   4 +-
 R/EstimationMethods.R           |  35 ++++++-----
 R/cloglogModel.R                |   4 ++
 R/control.R                     |  13 +++++
 R/family.R                      |  11 ++--
 R/generateData.r                |   6 +-
 R/internals.R                   |   2 +-
 R/logitModel.R                  |   6 ++
 R/main_function_documentation.R |  39 +++++++++----
 R/methods.R                     | 100 ++++++++++++++++++++------------
 R/nonprobDR.R                   |  18 +++---
 R/probitModel.R                 |   4 ++
 man/cloglog_model_nonprobsvy.Rd |   3 +
 man/controlInf.Rd               |   3 +
 man/controlOut.Rd               |   3 +
 man/controlSel.Rd               |   3 +
 man/genSimData.Rd               |   6 +-
 man/logit_model_nonprobsvy.Rd   |   3 +
 man/nonprob.Rd                  |  39 +++++++++----
 man/probit_model_nonprobsvy.Rd  |   3 +
 references.bib                  |   4 +-
 21 files changed, 216 insertions(+), 93 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 869dd3a..66b3e64 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -3,12 +3,12 @@ Type: Package
 Title: Package for Inference Based on Nonprobability Samples
 Version: 0.1.0
 Authors@R:
-    c(person(given = "Lukasz",
+    c(person(given = "Łukasz",
              family = "Chrostowski",
              role = c("aut", "cre"),
              email = "lukchr@st.amu.edu.pl"),
       person(given = "Maciej",
-             family = "Beresewicz",
+             family = "Beręsewicz",
              role = c("aut", "ctb"),
              email = "maciej.beresewicz@ue.poznan.pl"),
       person(given = "Piotr",
diff --git a/R/EstimationMethods.R b/R/EstimationMethods.R
index 71de446..ec90a44 100644
--- a/R/EstimationMethods.R
+++ b/R/EstimationMethods.R
@@ -135,7 +135,7 @@ mle <- function(...) {
     ps_nons_der <- dinv_link(eta_nons)
     est_ps_rand_der <- dinv_link(eta_rand)
 
-    resids <- c(est_ps_rand, ps_nons) - R
+    resids <- R - c(est_ps_rand, ps_nons)
 
     variance <- (t(resids) %*% resids) / df_reduced
 
@@ -293,7 +293,7 @@ gee <- function(...) {
     ps_nons <- inv_link(eta_nons)
     est_ps_rand <- inv_link(eta_rand)
     variance_covariance <- solve(-hess)
-    resids <- c(est_ps_rand, ps_nons) - R
+    resids <-  R - c(est_ps_rand, ps_nons)
 
     df_reduced <- nrow(X) - length(theta_hat)
     variance <- as.vector((t(resids) %*% resids) / df_reduced)
@@ -382,6 +382,7 @@ mm <- function(X, y, weights, weights_rand, R, n_nons, n_rand, method_selection,
   names(theta_hat) <- names(beta_hat) <- colnames(X)
   df_residual <- nrow(X) - length(theta_hat)
 
+  # selection parameters
   ps <- inv_link(theta_hat %*% t(X)) # inv_link(as.vector(X_design %*% as.matrix(theta_hat)))
   eta_sel <- theta_hat %*% t(X)
   ps_der <- dinv_link(eta_sel)
@@ -389,8 +390,8 @@ mm <- function(X, y, weights, weights_rand, R, n_nons, n_rand, method_selection,
   est_ps_rand <- ps[loc_rand]
   ps_nons_der <- ps_der[loc_nons]
   weights_nons <- 1/ps_nons
-  resids <- c(est_ps_rand, ps_nons) - R
-  variance <- (t(resids) %*% resids) / df_residual
+  resids <-  R - c(est_ps_rand, ps_nons)
+  variance <- as.vector( (t(resids) %*% resids) / df_residual)
 
   if (!boot) {
     N_nons <- sum(weights * weights_nons)
@@ -407,17 +408,19 @@ mm <- function(X, y, weights, weights_rand, R, n_nons, n_rand, method_selection,
   y_nons_pred <- y_hat[loc_nons]
 
   if (!boot) {
-    sigma <- family$variance(mu = y_hat, y = y[loc_rand])
-    residuals <- family$residuals(mu = y_rand_pred, y = y[loc_rand])
+    sigma_nons <- family$variance(mu = y_nons_pred, y = y[loc_nons])
+    sigma_rand <- family$variance(mu = y_rand_pred, y = y[loc_rand])
+    residuals <- family$residuals(mu = y_nons_pred, y = y[loc_nons])
   }
 
   if (!boot) {
     # variance-covariance matrix for outcome model
     # vcov_outcome <- solve(t(X_design) %*% diag(sigma) %*% X_design)
-    vcov_outcome <- solve(t(X) %*% (sigma * X))
+    vcov_outcome <- solve(t(X[loc_nons,]) %*% (sigma_nons * X[loc_nons,]))
     beta_errors <- sqrt(diag(vcov_outcome))
   }
 
+  # grad = multiroot$f.root[(p+1):(2*p)]
   if (!boot) {
     hess <- NULL
     selection <- list(theta_hat = theta_hat, # TODO list as close as possible to SelecttionList
@@ -428,24 +431,26 @@ mm <- function(X, y, weights, weights_rand, R, n_nons, n_rand, method_selection,
                       variance_covariance = vcov_selection,
                       df_residual = df_residual,
                       log_likelihood = "NULL",
-                      linear.predictors = eta_sel,
+                      eta = eta_sel,
                       aic = NULL,
                       residuals = resids,
                       variance = variance,
                       method = method)
 
     outcome <- list(coefficients = beta_hat, # TODO list as close as possible to glm
-                    grad = multiroot$f.root[(p+1):(2*p)],
-                    hess = hess, # TODO
+                    std_err = beta_errors,
                     variance_covariance = vcov_outcome,
                     df_residual = df_residual,
-                    family = list(mu = y_hat,
-                                  variance = sigma[loc_rand],
-                                  residuals = residuals),
+                    family = list(mu = y_nons_pred,
+                                  variance = sigma_nons,
+                                  family = family$family),
+                    residuals = residuals,
+                    fitted.values = y_nons_pred,
+                    sigma_rand = sigma_rand,
                     y_rand_pred = y_rand_pred,
                     y_nons_pred = y_nons_pred,
-                    log_likelihood = "NULL",
-                    linear.predictors = eta_out)
+                    linear.predictors = eta_out[loc_nons],
+                    X = X[loc_nons,])
   } else {
     selection <- list(coefficients = theta_hat,# TODO list as close as possible to SelecttionList
                       ps_nons = ps_nons)
diff --git a/R/cloglogModel.R b/R/cloglogModel.R
index 371e7a0..b2749b2 100644
--- a/R/cloglogModel.R
+++ b/R/cloglogModel.R
@@ -5,6 +5,10 @@
 #'
 #' @param ... Additional, optional arguments.
 #'
+#' @seealso
+#'
+#' [nonprob()] -- for fitting procedure with non-probability samples.
+#'
 #' @return List with selected methods/objects/functions.
 #' @importFrom maxLik maxLik
 #' @importFrom Matrix Matrix
diff --git a/R/control.R b/R/control.R
index e0f37b8..afb4d1f 100644
--- a/R/control.R
+++ b/R/control.R
@@ -37,6 +37,10 @@
 #'
 #' @return List with selected parameters.
 #'
+#' @seealso
+#'
+#' [nonprob()] -- for fitting procedure with non-probability samples.
+#'
 #' @export
 
 controlSel <- function(method = "glm.fit", # perhaps another control function for model with variables selection
@@ -102,6 +106,11 @@ controlSel <- function(method = "glm.fit", # perhaps another control function fo
 #'
 #' @return List with selected parameters.
 #'
+#' @seealso
+#'
+#' [nonprob()] -- for fitting procedure with non-probability samples.
+#'
+#'
 #' @export
 
 controlOut <- function(epsilon = 1e-6,
@@ -155,6 +164,10 @@ controlOut <- function(epsilon = 1e-6,
 #'
 #' @return List with selected parameters.
 #'
+#' @seealso
+#'
+#' [nonprob()] -- for fitting procedure with non-probability samples.
+#'
 #' @export
 
 controlInf <- function(vars_selection = FALSE,
diff --git a/R/family.R b/R/family.R
index b115e2a..7fc5adc 100644
--- a/R/family.R
+++ b/R/family.R
@@ -10,13 +10,14 @@ poisson_nonprobsvy <- function(link = "log") {
   structure(list(mu = mu,
                  variance = variance,
                  mu_der = mu_der,
-                 residuals = residuals),
+                 residuals = residuals,
+                 family = "poisson"),
             class = "family")
 }
 
 gaussian_nonprobsvy <- function(link = "identity") {
   mu <- function(eta) eta
-  variance <- function(mu, y = NULL) rep.int(1, length(mu)) #mean((y - mu)^2) rep.int(1, length(mu))
+  variance <- function(mu, y = NULL) mean((y - mu)^2) #rep.int(1, length(mu)) rep.int(1, length(mu))
   mu_der <- function(mu) 1 # first derivative
   mu_der2 <- function(mu) 0 # second derivative
   residuals <- function(mu, y) as.vector(y - mu)
@@ -24,7 +25,8 @@ gaussian_nonprobsvy <- function(link = "identity") {
   structure(list(mu = mu,
                  variance = variance,
                  mu_der = mu_der,
-                 residuals = residuals),
+                 residuals = residuals,
+                 family = "gaussian"),
              class = "family")
 }
 
@@ -41,6 +43,7 @@ binomial_nonprobsvy <- function(link = "logit") {
   structure(list(mu = mu,
                  variance = variance,
                  mu_der = mu_der,
-                 residuals = residuals),
+                 residuals = residuals,
+                 family = "binomial"),
             class = "family")
 }
diff --git a/R/generateData.r b/R/generateData.r
index ce1f87d..e081169 100644
--- a/R/generateData.r
+++ b/R/generateData.r
@@ -16,9 +16,9 @@ NULL
 #' \item{x2 -- the second variable based on z2 and x1}
 #' \item{x3 -- the third variable based on z3 and x1 and x2}
 #' \item{x4 -- the third variable based on z4 and x1, x2 and x3}
-#' \item{\mjseqn{y30} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma*\epsilon}, so the cor(y,y_hat) = 0.30}
-#' \item{\mjseqn{y60} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma*\\epsilon}, so the cor(y,y_hat) = 0.60}
-#' \item{\mjseqn{y80} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma*\epsilon}, so the cor(y,y_hat) = 0.80}
+#' \item{\mjseqn{y30} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.30}
+#' \item{\mjseqn{y60} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.60}
+#' \item{\mjseqn{y80} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.80}
 #' \item{rho -- true propensity scores for big data such that sum(rho)=n}
 #' \item{srs -- probabilities of inclusion to random sample such that max(srs)/min(srs)=50}
 #' }
diff --git a/R/internals.R b/R/internals.R
index 521ef2d..4e74416 100644
--- a/R/internals.R
+++ b/R/internals.R
@@ -387,7 +387,7 @@ internal_varDR <- function(OutcomeModel,
     svydesign_mean <- survey::svymean(~y_rand, svydesign)
 
     var_prob <- as.vector(attr(svydesign_mean, "var")) # based on survey package, probability component
-    var_nonprob <- (sum((infl1) - 2*infl2) + sum(weights_rand * sigma))/N_nons^2 # nonprobability component
+    var_nonprob <- (sum((infl1) - 2*infl2) + sum(weights_rand * sigma))/N_nons^2 # TODO potential bug here nonprobability component
     } else {
     eta <- as.vector(SelectionModel$X_nons %*% as.matrix(theta))
     h_n <- 1/N_nons * sum(OutcomeModel$y_nons - y_nons_pred) # TODO add weights # errors mean
diff --git a/R/logitModel.R b/R/logitModel.R
index c675972..c9d7779 100644
--- a/R/logitModel.R
+++ b/R/logitModel.R
@@ -7,9 +7,15 @@
 #'
 #' @return List with selected methods/objects/functions.
 #'
+#' @seealso
+#'
+#' [nonprob()] -- for fitting procedure with non-probability samples.
+#'
 #' @importFrom maxLik maxLik
 #' @importFrom Matrix Matrix
 #' @importFrom survey svyrecvar
+#'
+#'
 #' @export
 
 logit_model_nonprobsvy <- function(...) {
diff --git a/R/main_function_documentation.R b/R/main_function_documentation.R
index 80b5eb7..53c50a6 100644
--- a/R/main_function_documentation.R
+++ b/R/main_function_documentation.R
@@ -6,8 +6,12 @@ NULL
 #' \loadmathjax
 #' @description \code{nonprob} fits model for inference based on non-probability surveys (including big data) using various methods.
 #' The function allows you to estimate the population mean having access to a reference probability sample as well as total/mean values of covariates.
-#' In the package implemented state-of-the-art approaches recently proposed in the literature: Chen et al. (2020), Yang et al. (2020), Wu (2022) and use `survey` package [Lumley 2004](https://CRAN.R-project.org/package=survey) for inference.
-#' Provided propensity score weighting (e.g. with calibration constraints), mass imputation (e.g. nearest neighbour) and doubly robust estimators that take into account minimization of the asymptotic bias of the population mean estimators,
+#'
+#' In the package implemented state-of-the-art approaches recently proposed in the literature: Chen et al. (2020),
+#' Yang et al. (2020), Wu (2022) and use `survey` package [Lumley 2004](https://CRAN.R-project.org/package=survey) for inference.
+#'
+#' Provided propensity score weighting (e.g. with calibration constraints), mass imputation (e.g. nearest neighbour) and
+#' doubly robust estimators that take into account minimization of the asymptotic bias of the population mean estimators,
 #' variable selection or overlap between random and non-random sample.
 #' The package uses `survey` package functionalities when a probability sample is available.
 #'
@@ -84,7 +88,7 @@ NULL
 #'  Inverse probability approach is based on assumption that reference probability sample
 #'  is available and therefore we can estimate propensity score of selection mechanism.
 #'  Estimator has following form:
-#'  \mjsdeqn{\mu_{IPW} = \frac{1}{N^{A}}\sum_{i \in S_{A}} \frac{y_{i}}{\hat{\pi}_{i}^{A}}.}
+#'  \mjsdeqn{\hat{\mu}_{IPW} = \frac{1}{N^{A}}\sum_{i \in S_{A}} \frac{y_{i}}{\hat{\pi}_{i}^{A}}.}
 #'  For this purpose with consider multiple ways of estimation. The first approach is Maximum Likelihood Estimation with corrected
 #'  log-likelihood function which is given by the following formula
 #'  \mjsdeqn{
@@ -101,14 +105,14 @@ NULL
 #'    where imputed values of the outcome variables are created for whole probability sample.
 #'    In this case we treat big-data sample as a training dataset, which is used to build an imputation model. Using imputed values
 #'    for probability sample and design (known) weights, we can build population mean estimator of form:
-#'    \mjsdeqn{\mu_{MI} = \frac{1}{N^B}\sum_{i \in S_{B}} d_{i}^{B} \hat{y}_i.}
+#'    \mjsdeqn{\hat{\mu}_{MI} = \frac{1}{N^B}\sum_{i \in S_{B}} d_{i}^{B} \hat{y}_i.}
 #'    It opens the the door to very flexible method for imputation model. In the package used generalized linear models from [stats::glm()]
 #'    nearest neighbour algorithm using [RANN::nn2()] and predictive mean matching.
 #'
 #' 3. Doubly robust estimation -- The IPW and MI estimators are sensible on misspecified models for propensity score and outcome variable respectively.
 #'    For this purpose so called doubly-robust methods, which take into account these problems, are presented.
 #'    It is quite simple idea of combination propensity score and imputation models during inference which lead to the following estimator
-#'    \mjsdeqn{\mu_{DR} = \frac{1}{N^A}\sum_{i \in S_A} \hat{d}_i^A (y_i - \hat{y}_i) + \frac{1}{N^B}\sum_{i \in S_B} d_i^B \hat{y}_i.}
+#'    \mjsdeqn{\hat{\mu}_{DR} = \frac{1}{N^A}\sum_{i \in S_A} \hat{d}_i^A (y_i - \hat{y}_i) + \frac{1}{N^B}\sum_{i \in S_B} d_i^B \hat{y}_i.}
 #'    In addition, an approach based directly on bias minimisation has been implemented. Following formula
 #'    \mjsdeqn{
 #'    \begin{aligned}
@@ -191,14 +195,29 @@ NULL
 #'  \item{\code{nonprob_size} -- size of non-probability sample}
 #'  \item{\code{prob_size} -- size of probability sample}
 #'  \item{\code{pop_size} -- estimated population size derived from estimated weights (non-probability sample) or known design weights (probability sample)}
-#'  \item{\code{outcome} -- list containing information about fitting of mass imputation model, in case of regression model, object containing list returned by the function.
-#'  Additionaly when variables selection model for outcome variable is fitting, list includes of \code{cve} -- the error for each value of `lambda`, averaged accross the cross-validation folds.
-#'  [stats::glm()], in case of nearest neigbour imputation object containing list returned by [RANN::nn2()].}
+#'  \item{\code{outcome} -- list containing information about fitting of mass imputation model, in case of regression model, object containing list returned by the function
+#'  [stats::glm()], in case of nearest neighbour imputation object containing list returned by [RANN::nn2()]. If `bias_correction` in [controlInf()] is set on `TRUE`, then estimation is based on
+#'  the joint estimating equations for `selection` and `outcome` model and therefore, the list differs from the one returned by the [stats::glm()] function and contains elements such as
+#'  \itemize{
+#'  \item{\code{coefficients} -- estimated coefficients of the regression model}
+#'  \item{\code{std_err} -- standard errors of the estimated coefficients}
+#'  \item{\code{residuals} -- The response residuals}
+#'  \item{\code{variance_covariance} -- The variance-covariance matrix of the coefficient estimates}
+#'  \item{\code{df_residual} -- The degrees of freedom for residuals}
+#'  \item{\code{family} -- specifies the error distribution and link function to be used in the model}
+#'  \item{\code{fitted.values} -- The predicted values of the response variable based on the fitted model}
+#'  \item{\code{linear.predictors} -- The linear fit on link scale}
+#'  \item{\code{X} -- The design matrix}
+#'  \item{\code{method} -- set on `glm`, since the regression method}
+#'  }
+#'  }
+#'  Additionally when variables selection model for outcome variable is fitting, list includes of \code{cve} -- the error for each value of `lambda`, averaged across the cross-validation folds.
 #'  \item{\code{selection} -- list containing information about fitting of propensity score model, such as
 #'  \itemize{
 #'  \item{\code{coefficients} -- a named vector of coefficients}
 #'  \item{\code{std_err} -- standard errors of the estimated model coefficients}
-#'  \item{\code{residuals} -- the working residuals}
+#'  \item{\code{residuals} -- the response residuals}
+#'  \item{\code{variance} -- the root mean square error}
 #'  \item{\code{fitted_values} -- the fitted mean values, obtained by transforming the linear predictors by the inverse of the link function.}
 #'  \item{\code{link} -- the `link` object used.}
 #'  \item{\code{linear_predictors} -- the linear fit on link scale.}
@@ -208,7 +227,7 @@ NULL
 #'  \item{\code{formula} -- the formula supplied.}
 #'  \item{\code{df_residual} -- the residual degrees of freedom.}
 #'  \item{\code{log_likelihood} -- value of log-likelihood function if `mle` method, in the other case `NULL`.}
-#'  \item{\code{cve} -- the error for each value of `lambda`, averaged accross the cross-validation folds for variables selection model
+#'  \item{\code{cve} -- the error for each value of `lambda`, averaged across the cross-validation folds for variables selection model
 #'  when propensity score model is fitting.}
 #'   }
 #'  }
diff --git a/R/methods.R b/R/methods.R
index 586e91e..3c03dc6 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -238,8 +238,6 @@ print.summary_nonprobsvy <- function(x,
   invisible(x)
 }
 
-# Internal functions, no need for documenting them
-
 #' @method nobs nonprobsvy
 #' @importFrom stats nobs
 #' @exportS3Method
@@ -270,23 +268,31 @@ residuals.nonprobsvy <- function(object,
                                           "deviance",
                                           "response",
                                           "pearsonSTD"),
-                                 ...) { # TODO for pop_totals and variable selection
+                                 ...) { # TODO for pop_totals
 
   if (length(type) > 1) {
     type <- "response"
   }
-  if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) { # TODO for mm model
-    if (object$control$control_inference$vars_selection == FALSE) {
-      res_out <- residuals(object$outcome[[1]])
-    } else {
-      r <- object$outcome$family$residuals
-      res_out <- switch(type,
-                        "response" = r,
-                        "working" = r/object$outcome[[1]]$family$mu,
-                        # TODO "deviance" =
-                        "pearson" = r/sqrt(object$outcome[[1]]$family$variance),
-                        "pearsonSTD" = r/sqrt( (1 - hatvalues(object$outcome)$outcome) * object$outcome[[1]]$family$variance)
-                        )
+  if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) {
+    if (type %in% c("deviance", "pearson", "working",
+                    "response", "partial")) {
+      if (object$control$control_inference$bias_correction == TRUE) {
+        r <- object$outcome[[1]]$residuals
+        res_out <- switch(type,
+                          pearson = r,
+                          working = r * sqrt(object$selection$prior.weights),
+                          deviance = r * sqrt(abs(object$selection$prior.weights * object$outcome[[1]]$family$variance)),
+                          response = r / object$outcome[[1]]$family$mu,
+                          pearsonSTD = r/sqrt( (1 - hatvalues(object)$outcome) * object$outcome[[1]]$family$variance),
+                          stop("Invalid type of residual. Choose from 'pearson', 'working', 'deviance', 'response', 'pearsonSTD'.")
+        )
+      } else {
+        res_out <- residuals(object$outcome[[1]], type = type)
+      }
+    } else if (type == "pearsonSTD") {
+        r <- object$outcome[[1]]$residuals
+        variance <- as.vector( (t(r) %*% r) / length(object$outcome[[1]]$coefficients) )
+        res_out <- r/sqrt( (1 - hatvalues(object)$outcome) * variance)
     }
   }
   if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) {
@@ -300,11 +306,13 @@ residuals.nonprobsvy <- function(object,
     }
     r <- object$selection$residuals
     res_sel <- switch(type,
-                      "pearson" = (R - propensity_scores)/sqrt(propensity_scores * (1 - propensity_scores)),
+                      "response" = r,
+                      "working" = r / propensity_scores * (1 - propensity_scores),
+                      "pearson" = r / sqrt(propensity_scores * (1 - propensity_scores)),
                       "deviance" = s * sqrt(-2 * (R * log(propensity_scores) + (1 - R) * log(1 - propensity_scores))),
-                      "response" = R - propensity_scores,
-                      "pearsonSTD" = r/sqrt( (1 - hatvalues(object)$selection) * object$selection$variance)
-                      ) # TODO studentized_pearson, studentized_deviance
+                      "pearsonSTD" = r/sqrt( (1 - hatvalues(object)$selection) * object$selection$variance),
+                       stop("Invalid type of residual. Choose from 'pearson', 'working', 'deviance', 'response', 'pearsonSTD'.")
+                      ) # TODO add partial
   }
   if (class(object)[2] == "nonprobsvy_mi") res <- list(outcome = res_out)
   if (class(object)[2] == "nonprobsvy_ipw") res <- list(selection = res_sel)
@@ -323,18 +331,21 @@ cooks.distance.nonprobsvy <- function(model,
     residuals_out <- resids$outcome^2
     res_out <- cooks.distance(model$outcome[[1]])
     res <- list(outcome = res_out)
-  } else if (class(model)[2] == "nonprobsvy_ipw")
-  {
+  } else if (class(model)[2] == "nonprobsvy_ipw") {
     residuals_sel <- resids$selection^2
     hats <- hats$selection
     res_sel <- (residuals_sel * (hats / (length( model$selection$coefficients ))))
     res <- list(selection = res_sel)
   } else if (class(model)[2] == "nonprobsvy_dr") {
-    residuals_sel <- resids$selection^2
     residuals_out <- resids$outcome^2
+    if (model$control$control_inference$bias_correction == TRUE) {
+      res_out <- (residuals_out * (hats$outcome / (length( model$outcome[[1]]$coefficients ))))
+    } else {
+      res_out <- cooks.distance(model$outcome[[1]])
+    }
+    residuals_sel <- resids$selection^2
     hats <- hats$selection
     res_sel <- (residuals_sel * (hats / (length( model$selection$coefficients ))))
-    res_out <- cooks.distance(model$outcome[[1]])
     res <- list(selection = res_sel, outcome = res_out)
   }
   res
@@ -346,19 +357,32 @@ cooks.distance.nonprobsvy <- function(model,
 hatvalues.nonprobsvy <- function(model,
                                  ...) { # TODO reduce execution time and glm.fit object and customise to variable selection
   if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(model))) {
+    X <- model$X
     propensity_scores <- model$prob
     W <- Matrix::Diagonal(x = propensity_scores * (1 - propensity_scores))
-    XWX_inv <-  solve(t(model$X) %*% W %*% model$X)
-    hat_values_sel <- vector(mode = "numeric", length = length(propensity_scores))
-
-    for (i in 1:length(hat_values_sel)) {
-      hat_values_sel[i] <- W[i,i] * model$X[i,] %*% XWX_inv %*% model$X[i,]
-    }
-    #hats <- Matrix::Diagonal(x = W %*% object$X %*% XWX_inv %*% t(object$X))
-    #names(hat_values) <- row.names(model$parameters)
+    H <- as.matrix(sqrt(W) %*% X %*% solve(t(X) %*% W %*% X) %*% t(X) %*% sqrt(W))
+    hat_values_sel <- diag(H)
   }
   if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(model))) {
-  hat_values_out <- hatvalues(model$outcome$glm) # TODO
+  if (model$control$control_inference$bias_correction == TRUE) {
+      # TODO for mm consider switch
+    X_out <- model$outcome[[1]]$X
+    if (model$outcome[[1]]$family$family == "gaussian") {
+      hat_matrix <- X_out %*% solve(t(X_out) %*% X_out) %*% t(X_out)
+    } else if (model$outcome[[1]]$family$family == "poisson") {
+      W <- diag(1 / model$outcome[[1]]$fitted_values)
+
+      hat_matrix <- X_out %*% solve(t(X_out) %*% W %*% X_out) %*% t(X_out)
+
+    } else if (model$outcome[[1]]$family$family == "binomial") {
+      W <- Matrix::Diagonal(model$outcome[[1]]$family$mu * (1 - model$outcome[[1]]$family$mu))
+
+      hat_matrix <- as.matrix(sqrt(W) %*% X_out %*% solve(t(X_out) %*% W %*% X_out) %*% t(X_out) %*% sqrt(W))
+    }
+    hat_values_out <- diag(hat_matrix)
+    } else {
+      hat_values_out <- hatvalues(model$outcome[[1]])
+    }
   }
 
   if (class(model)[2] == "nonprobsvy_mi") res <- list(outcome = hat_values_out)
@@ -412,7 +436,7 @@ BIC.nonprobsvy <- function(object,
       if (!is.null(object$outcome[[1]]$coefficients)) {
         if (object$control$control_inference$vars_selection == TRUE) {
           options(AIC="BIC")
-          res_out <- HelpersMG::ExtractAIC.glm(object$outcome)[2]
+          res_out <- HelpersMG::ExtractAIC.glm(object$outcome[[1]])[2]
         } else {
           res_out <- BIC(object$outcome[[1]])
         }
@@ -508,8 +532,12 @@ vcov.nonprobsvy <- function(object,
 #' @importFrom stats deviance
 #' @exportS3Method
 deviance.nonprobsvy <- function(object,
-                                ...) {
-  res_out <- object$outcome[[1]]$deviance
+                                ...) { # TODO if conditions like method = "glm"
+  if (any(c("nonprobsvy_dr", "nonprobsvy_mi") %in% class(object))) res_out <- object$outcome[[1]]$deviance
+  if (any(c("nonprobsvy_dr", "nonprobsvy_ipw") %in% class(object))) res_sel <- -2 * logLik(object)
   # TODO for selection model - use selection object
-  res_out
+  if (class(object)[2] == "nonprobsvy_mi") res <- c("outcome" = res_out)
+  if (class(object)[2] == "nonprobsvy_ipw") res <- c("selection" = res_sel)
+  if (class(object)[2] == "nonprobsvy_dr") res <- c("selection" = res_sel, "outcome"= res_out)
+  res
 }
diff --git a/R/nonprobDR.R b/R/nonprobDR.R
index 215b3f5..4840a75 100644
--- a/R/nonprobDR.R
+++ b/R/nonprobDR.R
@@ -67,6 +67,8 @@ nonprobDR <- function(selection,
 
   # Selection models
   if (is.null(pop_totals) && !is.null(svydesign)) {
+
+    # TODO if (bias_corr == FALSE)
       #model for selection formula
       SelectionModel <- model_frame(formula = selection,
                                     data = data,
@@ -281,9 +283,9 @@ nonprobDR <- function(selection,
                                start_outcome = start_outcome)
 
         selection_model <- estimation_model$selection
-        OutcomeList <- outcome_model <- estimation_model$outcome
+        OutcomeList[[k]] <- outcome_model <- estimation_model$outcome
 
-        theta_hat <- selection_model$coefficients
+        theta_hat <- selection_model$theta_hat
         grad <- selection_model$grad
         hess <- selection_model$hess
         ps_nons <- selection_model$ps_nons
@@ -291,14 +293,16 @@ nonprobDR <- function(selection,
         ps_nons_der <- selection_model$ps_nons_der
         est_ps_rand_der <- selection_model$est_ps_rand_der
         theta_standard_errors <- sqrt(diag(selection_model$variance_covariance))
-        names(theta_hat) <- names(selection_model$coefficients) <- colnames(X)
+        names(theta_hat) <- names(selection_model$theta_hat) <- colnames(X)
 
         y_rand_pred <- outcome_model$y_rand_pred
         y_nons_pred <- outcome_model$y_nons_pred
         beta <- outcome_model$coefficients
         beta_errors <- sqrt(diag(outcome_model$variance_covariance))
         beta_statistics <- data.frame(beta = beta, beta_errors = beta_errors)
-        sigma <- outcome_model$family$var
+        sigma <- outcome_model$sigma_rand
+
+        OutcomeList[[k]][c("sigma_rand", "y_rand_pred", "y_nons_pred")] <- NULL
 
         weights_nons <- 1/ps_nons
         N_nons <- sum(weights * weights_nons)
@@ -716,7 +720,7 @@ nonprobDR <- function(selection,
   SelectionList <- list(coefficients = selection_model$theta_hat,
                         std_err = theta_standard_errors,
                         residuals = selection_model$residuals,
-                        variance = selection_model$variance,
+                        variance = as.vector(selection_model$variance),
                         fitted_values = prop_scores,
                         family = selection_model$method,
                         linear_predictors = selection_model$eta,
@@ -744,8 +748,8 @@ nonprobDR <- function(selection,
          nonprob_size = n_nons,
          prob_size = n_rand,
          pop_size = pop_size,
-         outcome = OutcomeList, # TODO consider what this list should include of
-         selection = SelectionList # TODO consider what this list should include of
+         outcome = OutcomeList,
+         selection = SelectionList
     ),
     class = c("nonprobsvy", "nonprobsvy_dr"))
 }
diff --git a/R/probitModel.R b/R/probitModel.R
index 4ef4d46..396e2a1 100644
--- a/R/probitModel.R
+++ b/R/probitModel.R
@@ -7,6 +7,10 @@
 #'
 #' @return List with selected methods/objects/functions.
 #'
+#' @seealso
+#'
+#' [nonprob()] -- for fitting procedure with non-probability samples.
+#'
 #' @importFrom maxLik maxLik
 #' @importFrom stats pnorm
 #' @importFrom stats dnorm
diff --git a/man/cloglog_model_nonprobsvy.Rd b/man/cloglog_model_nonprobsvy.Rd
index d3dd936..58eb563 100644
--- a/man/cloglog_model_nonprobsvy.Rd
+++ b/man/cloglog_model_nonprobsvy.Rd
@@ -15,6 +15,9 @@ List with selected methods/objects/functions.
 \description{
 \code{cloglog_model_nonprobsvy} returns all methods/objects/functions required for the model estimation assuming a complementary log-log link function.
 }
+\seealso{
+\code{\link[=nonprob]{nonprob()}} -- for fitting procedure with non-probability samples.
+}
 \author{
 Łukasz Chrostowski, Maciej Beręsewicz
 }
diff --git a/man/controlInf.Rd b/man/controlInf.Rd
index 4cdebea..3616d58 100644
--- a/man/controlInf.Rd
+++ b/man/controlInf.Rd
@@ -46,3 +46,6 @@ List with selected parameters.
 \code{controlInf} constructs a list with all necessary control parameters
 for statistical inference.
 }
+\seealso{
+\code{\link[=nonprob]{nonprob()}} -- for fitting procedure with non-probability samples.
+}
diff --git a/man/controlOut.Rd b/man/controlOut.Rd
index 9b93c6d..d1aa3b5 100644
--- a/man/controlOut.Rd
+++ b/man/controlOut.Rd
@@ -51,3 +51,6 @@ List with selected parameters.
 \code{controlOut} constructs a list with all necessary control parameters
 for outcome model.
 }
+\seealso{
+\code{\link[=nonprob]{nonprob()}} -- for fitting procedure with non-probability samples.
+}
diff --git a/man/controlSel.Rd b/man/controlSel.Rd
index 7f93e77..d275c7b 100644
--- a/man/controlSel.Rd
+++ b/man/controlSel.Rd
@@ -90,6 +90,9 @@ for selection model.
 
 \loadmathjax
 }
+\seealso{
+\code{\link[=nonprob]{nonprob()}} -- for fitting procedure with non-probability samples.
+}
 \author{
 Łukasz Chrostowski, Maciej Beręsewicz
 }
diff --git a/man/genSimData.Rd b/man/genSimData.Rd
index 2712dd1..31571d4 100644
--- a/man/genSimData.Rd
+++ b/man/genSimData.Rd
@@ -19,9 +19,9 @@ genSimData(N = 10000, n = 1000)
 \item{x2 -- the second variable based on z2 and x1}
 \item{x3 -- the third variable based on z3 and x1 and x2}
 \item{x4 -- the third variable based on z4 and x1, x2 and x3}
-\item{\mjseqn{y30} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma*\epsilon}, so the cor(y,y_hat) = 0.30}
-\item{\mjseqn{y60} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma*\\epsilon}, so the cor(y,y_hat) = 0.60}
-\item{\mjseqn{y80} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma*\epsilon}, so the cor(y,y_hat) = 0.80}
+\item{\mjseqn{y30} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.30}
+\item{\mjseqn{y60} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.60}
+\item{\mjseqn{y80} -- \mjseqn{y} generated from the model \mjseqn{y=2+x1+x2+x3+x4+\sigma \cdot \varepsilon}, so the cor(y,y_hat) = 0.80}
 \item{rho -- true propensity scores for big data such that sum(rho)=n}
 \item{srs -- probabilities of inclusion to random sample such that max(srs)/min(srs)=50}
 }
diff --git a/man/logit_model_nonprobsvy.Rd b/man/logit_model_nonprobsvy.Rd
index f179bd5..d8ae1e6 100644
--- a/man/logit_model_nonprobsvy.Rd
+++ b/man/logit_model_nonprobsvy.Rd
@@ -15,6 +15,9 @@ List with selected methods/objects/functions.
 \description{
 \code{logit_model_nonprobsvy} returns all methods/objects/functions required for the model estimation assuming a logit link function.
 }
+\seealso{
+\code{\link[=nonprob]{nonprob()}} -- for fitting procedure with non-probability samples.
+}
 \author{
 Łukasz Chrostowski, Maciej Beręsewicz
 }
diff --git a/man/nonprob.Rd b/man/nonprob.Rd
index bf56ad3..db6495a 100644
--- a/man/nonprob.Rd
+++ b/man/nonprob.Rd
@@ -102,14 +102,29 @@ with type \code{list} containing:\cr
 \item{\code{nonprob_size} -- size of non-probability sample}
 \item{\code{prob_size} -- size of probability sample}
 \item{\code{pop_size} -- estimated population size derived from estimated weights (non-probability sample) or known design weights (probability sample)}
-\item{\code{outcome} -- list containing information about fitting of mass imputation model, in case of regression model, object containing list returned by the function.
-Additionaly when variables selection model for outcome variable is fitting, list includes of \code{cve} -- the error for each value of \code{lambda}, averaged accross the cross-validation folds.
-\code{\link[stats:glm]{stats::glm()}}, in case of nearest neigbour imputation object containing list returned by \code{\link[RANN:nn2]{RANN::nn2()}}.}
+\item{\code{outcome} -- list containing information about fitting of mass imputation model, in case of regression model, object containing list returned by the function
+\code{\link[stats:glm]{stats::glm()}}, in case of nearest neighbour imputation object containing list returned by \code{\link[RANN:nn2]{RANN::nn2()}}. If \code{bias_correction} in \code{\link[=controlInf]{controlInf()}} is set on \code{TRUE}, then estimation is based on
+the joint estimating equations for \code{selection} and \code{outcome} model and therefore, the list differs from the one returned by the \code{\link[stats:glm]{stats::glm()}} function and contains elements such as
+\itemize{
+\item{\code{coefficients} -- estimated coefficients of the regression model}
+\item{\code{std_err} -- standard errors of the estimated coefficients}
+\item{\code{residuals} -- The response residuals}
+\item{\code{variance_covariance} -- The variance-covariance matrix of the coefficient estimates}
+\item{\code{df_residual} -- The degrees of freedom for residuals}
+\item{\code{family} -- specifies the error distribution and link function to be used in the model}
+\item{\code{fitted.values} -- The predicted values of the response variable based on the fitted model}
+\item{\code{linear.predictors} -- The linear fit on link scale}
+\item{\code{X} -- The design matrix}
+\item{\code{method} -- set on \code{glm}, since the regression method}
+}
+}
+Additionally when variables selection model for outcome variable is fitting, list includes of \code{cve} -- the error for each value of \code{lambda}, averaged across the cross-validation folds.
 \item{\code{selection} -- list containing information about fitting of propensity score model, such as
 \itemize{
 \item{\code{coefficients} -- a named vector of coefficients}
 \item{\code{std_err} -- standard errors of the estimated model coefficients}
-\item{\code{residuals} -- the working residuals}
+\item{\code{residuals} -- the response residuals}
+\item{\code{variance} -- the root mean square error}
 \item{\code{fitted_values} -- the fitted mean values, obtained by transforming the linear predictors by the inverse of the link function.}
 \item{\code{link} -- the \code{link} object used.}
 \item{\code{linear_predictors} -- the linear fit on link scale.}
@@ -119,7 +134,7 @@ Additionaly when variables selection model for outcome variable is fitting, list
 \item{\code{formula} -- the formula supplied.}
 \item{\code{df_residual} -- the residual degrees of freedom.}
 \item{\code{log_likelihood} -- value of log-likelihood function if \code{mle} method, in the other case \code{NULL}.}
-\item{\code{cve} -- the error for each value of \code{lambda}, averaged accross the cross-validation folds for variables selection model
+\item{\code{cve} -- the error for each value of \code{lambda}, averaged across the cross-validation folds for variables selection model
 when propensity score model is fitting.}
 }
 }
@@ -128,8 +143,12 @@ when propensity score model is fitting.}
 \description{
 \code{nonprob} fits model for inference based on non-probability surveys (including big data) using various methods.
 The function allows you to estimate the population mean having access to a reference probability sample as well as total/mean values of covariates.
-In the package implemented state-of-the-art approaches recently proposed in the literature: Chen et al. (2020), Yang et al. (2020), Wu (2022) and use \code{survey} package \href{https://CRAN.R-project.org/package=survey}{Lumley 2004} for inference.
-Provided propensity score weighting (e.g. with calibration constraints), mass imputation (e.g. nearest neighbour) and doubly robust estimators that take into account minimization of the asymptotic bias of the population mean estimators,
+
+In the package implemented state-of-the-art approaches recently proposed in the literature: Chen et al. (2020),
+Yang et al. (2020), Wu (2022) and use \code{survey} package \href{https://CRAN.R-project.org/package=survey}{Lumley 2004} for inference.
+
+Provided propensity score weighting (e.g. with calibration constraints), mass imputation (e.g. nearest neighbour) and
+doubly robust estimators that take into account minimization of the asymptotic bias of the population mean estimators,
 variable selection or overlap between random and non-random sample.
 The package uses \code{survey} package functionalities when a probability sample is available.
 }
@@ -182,7 +201,7 @@ This is why we talk about so called “biased sample” problem.
 Inverse probability approach is based on assumption that reference probability sample
 is available and therefore we can estimate propensity score of selection mechanism.
 Estimator has following form:
-\mjsdeqn{\mu_{IPW} = \frac{1}{N^{A}}\sum_{i \in S_{A}} \frac{y_{i}}{\hat{\pi}_{i}^{A}}.}
+\mjsdeqn{\hat{\mu}_{IPW} = \frac{1}{N^{A}}\sum_{i \in S_{A}} \frac{y_{i}}{\hat{\pi}_{i}^{A}}.}
 For this purpose with consider multiple ways of estimation. The first approach is Maximum Likelihood Estimation with corrected
 log-likelihood function which is given by the following formula
 \mjsdeqn{
@@ -198,13 +217,13 @@ sample and can use vector of population totals/means.
 where imputed values of the outcome variables are created for whole probability sample.
 In this case we treat big-data sample as a training dataset, which is used to build an imputation model. Using imputed values
 for probability sample and design (known) weights, we can build population mean estimator of form:
-\mjsdeqn{\mu_{MI} = \frac{1}{N^B}\sum_{i \in S_{B}} d_{i}^{B} \hat{y}_i.}
+\mjsdeqn{\hat{\mu}_{MI} = \frac{1}{N^B}\sum_{i \in S_{B}} d_{i}^{B} \hat{y}_i.}
 It opens the the door to very flexible method for imputation model. In the package used generalized linear models from \code{\link[stats:glm]{stats::glm()}}
 nearest neighbour algorithm using \code{\link[RANN:nn2]{RANN::nn2()}} and predictive mean matching.
 \item Doubly robust estimation -- The IPW and MI estimators are sensible on misspecified models for propensity score and outcome variable respectively.
 For this purpose so called doubly-robust methods, which take into account these problems, are presented.
 It is quite simple idea of combination propensity score and imputation models during inference which lead to the following estimator
-\mjsdeqn{\mu_{DR} = \frac{1}{N^A}\sum_{i \in S_A} \hat{d}_i^A (y_i - \hat{y}_i) + \frac{1}{N^B}\sum_{i \in S_B} d_i^B \hat{y}_i.}
+\mjsdeqn{\hat{\mu}_{DR} = \frac{1}{N^A}\sum_{i \in S_A} \hat{d}_i^A (y_i - \hat{y}_i) + \frac{1}{N^B}\sum_{i \in S_B} d_i^B \hat{y}_i.}
 In addition, an approach based directly on bias minimisation has been implemented. Following formula
 \mjsdeqn{
    \begin{aligned}
diff --git a/man/probit_model_nonprobsvy.Rd b/man/probit_model_nonprobsvy.Rd
index ce4f868..9eec038 100644
--- a/man/probit_model_nonprobsvy.Rd
+++ b/man/probit_model_nonprobsvy.Rd
@@ -15,6 +15,9 @@ List with selected methods/objects/functions.
 \description{
 \code{probit_model_nonprobsvy} returns all methods/objects/functions required for the model estimation assuming a probit link function.
 }
+\seealso{
+\code{\link[=nonprob]{nonprob()}} -- for fitting procedure with non-probability samples.
+}
 \author{
 Łukasz Chrostowski, Maciej Beręsewicz
 }
diff --git a/references.bib b/references.bib
index 458d9d7..fce9e33 100644
--- a/references.bib
+++ b/references.bib
@@ -64,7 +64,7 @@ @Misc{Lumley2023
     note = {R package version 4.2},
   }
 
-@Article{Lumley2004,
+@article{Lumley2004,
     year = {2004},
     author = {Thomas Lumley},
     title = {Analysis of Complex Survey Samples},
@@ -72,7 +72,7 @@ @Article{Lumley2004
     volume = {9},
     number = {1},
     pages = {1-19},
-    note = {R package verson 2.2},
+    note = {R package version 2.2},
   }