Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update docu of internal functions #103

Merged
merged 1 commit into from
Nov 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions R/utils_calculate.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ colMeans_factor <- function(x) {
out
}

#' Fast Weighted Mean by Fixed-Length Groups
#' Grouped Column Means
#'
#' Internal workhorse to aggregate predictions over fixed-length grids.
#' Internal workhorse to aggregate predictions over fixed-length groups.
#'
#' @noRd
#' @keywords internal
Expand Down Expand Up @@ -138,7 +138,7 @@ wrowmean <- function(x, ngroups = 1L, w = NULL) {

#' (w)rowmean() for Factors (without weights)
#'
#' `colMeans_factor()` for equal sized groups.
#' Grouped `colMeans_factor()` for equal sized groups.
#'
#' @noRd
#' @keywords internal
Expand All @@ -158,7 +158,7 @@ rowmean_factor <- function(x, ngroups = 1L) {

#' wrowmean() for Vectors
#'
#' Weighted column means over fixed-length groups for vectors or 1d matrices.
#' Grouped means over fixed-length groups for vectors or 1d matrices.
#'
#' @noRd
#' @keywords internal
Expand All @@ -182,7 +182,7 @@ wrowmean_vector <- function(x, ngroups = 1L, w = NULL) {

#' wrowmean() for Matrices
#'
#' Weighted column means over fixed-length groups for matrices.
#' Grouped column means over fixed-length groups for matrices.
#'
#' @noRd
#' @keywords internal
Expand Down
55 changes: 27 additions & 28 deletions backlog/benchmark.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ library(iml) # Might benefit of multiprocessing, but on Windows with XGB models
library(DALEX)
library(ingredients)
library(flashlight)
library(microbenchmark)
library(bench)

set.seed(1)

Expand All @@ -107,53 +107,52 @@ fl <- flashlight(
)

# Permutation importance: 10 repeats over full validation data (~2700 rows)
microbenchmark(
bench::mark(
iml = FeatureImp$new(mod, n.repetitions = 10, loss = "mse", compare = "difference"),
dalex = feature_importance(ex, B = 10, type = "difference", n_sample = Inf),
flashlight = light_importance(fl, v = x, n_max = Inf, m_repetitions = 10),
hstats = perm_importance(fit, X = X_valid, y = y_valid, m_rep = 10, verbose = FALSE),
times = 4
check = FALSE,
min_iterations = 3
)

# Unit: milliseconds
# expr min lq mean median uq max neval cld
# iml 1610.4464 1622.3517 1657.6455 1642.3422 1692.9394 1735.4514 4 a
# dalex 580.5633 628.7967 665.1718 685.7349 701.5470 708.6542 4 b
# flashlight 622.3130 630.7167 648.9690 648.5589 667.2214 676.4453 4 b
# hstats 332.1432 334.4255 337.0738 337.0140 339.7221 342.1240 4 c

# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# iml 1.58s 1.58s 0.631 209.4MB 2.73 3 13 4.76s
# dalex 566.21ms 586.91ms 1.72 34.6MB 0.572 3 1 1.75s
# flashlight 587.03ms 613.15ms 1.63 27.1MB 1.63 3 3 1.84s
# hstats 353.78ms 360.57ms 2.79 27.2MB 0 3 0 1.08s

# Partial dependence (cont)
v <- "tot_lvg_area"
microbenchmark(
bench::mark(
iml = FeatureEffect$new(mod, feature = v, grid.size = 50, method = "pdp"),
dalex = partial_dependence(ex, variables = v, N = Inf, grid_points = 50),
flashlight = light_profile(fl, v = v, pd_n_max = Inf, n_bins = 50),
hstats = partial_dep(fit, v = v, X = X_valid, grid_size = 50, n_max = Inf),
times = 4
check = FALSE,
min_iterations = 3
)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# iml 1098.6226 1111.7868 1123.7506 1129.6484 1135.7144 1137.0828 4 a
# dalex 740.6559 762.3050 827.4134 784.8789 892.5218 999.2398 4 b
# flashlight 363.2473 368.0095 392.9258 373.7185 417.8420 461.0187 4 c
# hstats 213.4137 214.0246 225.5381 224.5216 237.0517 239.6956 4 d

# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# iml 1.11s 1.13s 0.887 376.3MB 3.84 3 13 3.38s
# dalex 782.13ms 783.08ms 1.24 192.8MB 2.90 3 7 2.41s
# flashlight 367.73ms 372.5ms 2.68 67.9MB 2.68 3 3 1.12s
# hstats 220.88ms 222.5ms 4.50 14.2MB 0 3 0 666.33ms

# Partial dependence (discrete)
v <- "structure_quality"
microbenchmark(
bench::mark(
iml = FeatureEffect$new(mod, feature = v, method = "pdp", grid.points = 1:5),
dalex = partial_dependence(ex, variables = v, N = Inf, variable_type = "categorical", grid_points = 5),
flashlight = light_profile(fl, v = v, pd_n_max = Inf),
hstats = partial_dep(fit, v = v, X = X_valid, n_max = Inf),
times = 4
check = FALSE,
min_iterations = 3
)

# Unit: milliseconds
# expr min lq mean median uq max neval cld
# iml 96.5188 96.84865 101.15893 99.27995 105.46920 109.5570 4 a
# dalex 166.5767 167.09505 169.68585 169.94295 172.27665 172.2808 4 b
# flashlight 40.8074 41.76215 49.22383 44.56515 56.68550 66.9576 4 c
# hstats 23.7283 23.86510 24.99588 24.01180 26.12665 28.2316 4 d
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# iml 90ms 96ms 10.6 13.29MB 7.06 3 2 283ms
# dalex 170.6ms 174.4ms 5.73 20.55MB 2.87 2 1 349ms
# flashlight 40.8ms 43.8ms 23.1 6.36MB 2.10 11 1 476ms
# hstats 23.5ms 24.4ms 40.6 1.53MB 2.14 19 1 468ms

# H-Stats -> we use a subset of 500 rows
X_v500 <- X_valid[1:500, ]
Expand Down