From f2f2aa7b74b3e3d8adfb237b7e75db63faf5ca13 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Fri, 29 Sep 2023 12:40:09 +0200 Subject: [PATCH 1/2] CRAN submission ready --- NEWS.md | 2 ++ R/hstats.R | 6 +++--- R/partial_dep.R | 2 +- cran-comments.md | 8 +++----- man/average_loss.Rd | 6 +++--- man/hstats.Rd | 6 +++--- man/ice.Rd | 6 +++--- man/partial_dep.Rd | 8 ++++---- man/perm_importance.Rd | 6 +++--- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/NEWS.md b/NEWS.md index 535342fd..aadc03ba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # hstats 0.3.0 +This is intended to be the last version before 1.0.0. + ## Visible changes - Grid of `ice()` and `partial_dep()`: So far, the default grid strategy "uniform" used `pretty()` to generate the evaluation points. To provide more predictable grid sizes, and to be more in line with other implementations of partial dependence and ICE, we now use `seq()` to create the uniform grid. diff --git a/R/hstats.R b/R/hstats.R index b87c9283..d8402b6e 100644 --- a/R/hstats.R +++ b/R/hstats.R @@ -27,9 +27,9 @@ #' @param pred_fun Prediction function of the form `function(object, X, ...)`, #' providing \eqn{K \ge 1} predictions per row. Its first argument represents the #' model `object`, its second argument a data structure like `X`. Additional arguments -#' (such as `type = "response"` in a GLM) can be passed via `...`. The default, -#' [stats::predict()], will work in most cases. Note that column names in a resulting -#' matrix of predictions will be used as default column names in the results. +#' (such as `type = "response"` in a GLM, or `reshape = TRUE` in a multiclass XGBoost +#' model) can be passed via `...`. The default, [stats::predict()], will work in +#' most cases. #' @param n_max If `X` has more than `n_max` rows, a random sample of `n_max` rows is #' selected from `X`. In this case, set a random seed for reproducibility. #' @param w Optional vector of case weights for each row of `X`. diff --git a/R/partial_dep.R b/R/partial_dep.R index 5ef9c902..cd44b767 100644 --- a/R/partial_dep.R +++ b/R/partial_dep.R @@ -2,7 +2,7 @@ #' #' Estimates the partial dependence function of feature(s) `v` over a #' grid of values. Both multivariate and multivariable situations are supported. -#' By default, the resulting values are plotted. +#' The resulting object can be plotted via `plot()`. #' #' @section Partial Dependence Functions: #' diff --git a/cran-comments.md b/cran-comments.md index 148d2928..382d71b3 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,10 +1,8 @@ -# hstats 0.2.0 +# hstats 0.3.0 -Hello CRAN team +Dear CRAN team -{hstats} received a fast, multivariate permutation importance function, and some API improvements. Not yet ready for version 1.0.0, but almost :-). - -Thanks a lot for caring about R so much +{hstats} is now one step away from stable version 1.0.0. Michael diff --git a/man/average_loss.Rd b/man/average_loss.Rd index 9f16652d..11cae79a 100644 --- a/man/average_loss.Rd +++ b/man/average_loss.Rd @@ -68,9 +68,9 @@ for instance \code{type = "response"} in a \code{\link[=glm]{glm()}} model.} \item{pred_fun}{Prediction function of the form \verb{function(object, X, ...)}, providing \eqn{K \ge 1} predictions per row. Its first argument represents the model \code{object}, its second argument a data structure like \code{X}. Additional arguments -(such as \code{type = "response"} in a GLM) can be passed via \code{...}. The default, -\code{\link[stats:predict]{stats::predict()}}, will work in most cases. Note that column names in a resulting -matrix of predictions will be used as default column names in the results.} +(such as \code{type = "response"} in a GLM, or \code{reshape = TRUE} in a multiclass XGBoost +model) can be passed via \code{...}. The default, \code{\link[stats:predict]{stats::predict()}}, will work in +most cases.} \item{BY}{Optional grouping vector.} diff --git a/man/hstats.Rd b/man/hstats.Rd index 560c828e..21bbee6a 100644 --- a/man/hstats.Rd +++ b/man/hstats.Rd @@ -75,9 +75,9 @@ for instance \code{type = "response"} in a \code{\link[=glm]{glm()}} model.} \item{pred_fun}{Prediction function of the form \verb{function(object, X, ...)}, providing \eqn{K \ge 1} predictions per row. Its first argument represents the model \code{object}, its second argument a data structure like \code{X}. Additional arguments -(such as \code{type = "response"} in a GLM) can be passed via \code{...}. The default, -\code{\link[stats:predict]{stats::predict()}}, will work in most cases. Note that column names in a resulting -matrix of predictions will be used as default column names in the results.} +(such as \code{type = "response"} in a GLM, or \code{reshape = TRUE} in a multiclass XGBoost +model) can be passed via \code{...}. The default, \code{\link[stats:predict]{stats::predict()}}, will work in +most cases.} \item{n_max}{If \code{X} has more than \code{n_max} rows, a random sample of \code{n_max} rows is selected from \code{X}. In this case, set a random seed for reproducibility.} diff --git a/man/ice.Rd b/man/ice.Rd index 99a1dc2d..1c7edefb 100644 --- a/man/ice.Rd +++ b/man/ice.Rd @@ -79,9 +79,9 @@ for instance \code{type = "response"} in a \code{\link[=glm]{glm()}} model.} \item{pred_fun}{Prediction function of the form \verb{function(object, X, ...)}, providing \eqn{K \ge 1} predictions per row. Its first argument represents the model \code{object}, its second argument a data structure like \code{X}. Additional arguments -(such as \code{type = "response"} in a GLM) can be passed via \code{...}. The default, -\code{\link[stats:predict]{stats::predict()}}, will work in most cases. Note that column names in a resulting -matrix of predictions will be used as default column names in the results.} +(such as \code{type = "response"} in a GLM, or \code{reshape = TRUE} in a multiclass XGBoost +model) can be passed via \code{...}. The default, \code{\link[stats:predict]{stats::predict()}}, will work in +most cases.} \item{BY}{Optional grouping vector/matrix/data.frame (up to two columns), or up to two column names. Unlike with \code{\link[=partial_dep]{partial_dep()}}, these variables are not diff --git a/man/partial_dep.Rd b/man/partial_dep.Rd index 6f892b2c..389cbe94 100644 --- a/man/partial_dep.Rd +++ b/man/partial_dep.Rd @@ -87,9 +87,9 @@ for instance \code{type = "response"} in a \code{\link[=glm]{glm()}} model.} \item{pred_fun}{Prediction function of the form \verb{function(object, X, ...)}, providing \eqn{K \ge 1} predictions per row. Its first argument represents the model \code{object}, its second argument a data structure like \code{X}. Additional arguments -(such as \code{type = "response"} in a GLM) can be passed via \code{...}. The default, -\code{\link[stats:predict]{stats::predict()}}, will work in most cases. Note that column names in a resulting -matrix of predictions will be used as default column names in the results.} +(such as \code{type = "response"} in a GLM, or \code{reshape = TRUE} in a multiclass XGBoost +model) can be passed via \code{...}. The default, \code{\link[stats:predict]{stats::predict()}}, will work in +most cases.} \item{BY}{Optional grouping vector or a column name. The partial dependence function is calculated per \code{BY} group. Each \code{BY} group @@ -132,7 +132,7 @@ An object of class "partial_dep" containing these elements: \description{ Estimates the partial dependence function of feature(s) \code{v} over a grid of values. Both multivariate and multivariable situations are supported. -By default, the resulting values are plotted. +The resulting object can be plotted via \code{plot()}. } \section{Methods (by class)}{ \itemize{ diff --git a/man/perm_importance.Rd b/man/perm_importance.Rd index dbc93682..d2d4cbc7 100644 --- a/man/perm_importance.Rd +++ b/man/perm_importance.Rd @@ -89,9 +89,9 @@ for instance \code{type = "response"} in a \code{\link[=glm]{glm()}} model.} \item{pred_fun}{Prediction function of the form \verb{function(object, X, ...)}, providing \eqn{K \ge 1} predictions per row. Its first argument represents the model \code{object}, its second argument a data structure like \code{X}. Additional arguments -(such as \code{type = "response"} in a GLM) can be passed via \code{...}. The default, -\code{\link[stats:predict]{stats::predict()}}, will work in most cases. Note that column names in a resulting -matrix of predictions will be used as default column names in the results.} +(such as \code{type = "response"} in a GLM, or \code{reshape = TRUE} in a multiclass XGBoost +model) can be passed via \code{...}. The default, \code{\link[stats:predict]{stats::predict()}}, will work in +most cases.} \item{loss}{One of "squared_error", "logloss", "mlogloss", "poisson", "gamma", "absolute_error", "classification_error". Alternatively, a loss function From 630a5ddce026a0bc22c32e8f888e5d8d7295da89 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Fri, 29 Sep 2023 13:47:45 +0200 Subject: [PATCH 2/2] on its way to CRAN --- CRAN-SUBMISSION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index 3e95b957..b85b7d0d 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 0.2.0 -Date: 2023-09-03 16:23:41 UTC -SHA: 9fb4db1361c4593b956fb65fd122ed49d1b01caa +Version: 0.3.0 +Date: 2023-09-29 11:29:11 UTC +SHA: f2f2aa7b74b3e3d8adfb237b7e75db63faf5ca13