diff --git a/R/utils_calculate.R b/R/utils_calculate.R index 875b4b9d..760c62ee 100644 --- a/R/utils_calculate.R +++ b/R/utils_calculate.R @@ -95,9 +95,9 @@ colMeans_factor <- function(x) { out } -#' Fast Weighted Mean by Fixed-Length Groups +#' Grouped Column Means #' -#' Internal workhorse to aggregate predictions over fixed-length grids. +#' Internal workhorse to aggregate predictions over fixed-length groups. #' #' @noRd #' @keywords internal @@ -138,7 +138,7 @@ wrowmean <- function(x, ngroups = 1L, w = NULL) { #' (w)rowmean() for Factors (without weights) #' -#' `colMeans_factor()` for equal sized groups. +#' Grouped `colMeans_factor()` for equal sized groups. #' #' @noRd #' @keywords internal @@ -158,7 +158,7 @@ rowmean_factor <- function(x, ngroups = 1L) { #' wrowmean() for Vectors #' -#' Weighted column means over fixed-length groups for vectors or 1d matrices. +#' Grouped means over fixed-length groups for vectors or 1d matrices. #' #' @noRd #' @keywords internal @@ -182,7 +182,7 @@ wrowmean_vector <- function(x, ngroups = 1L, w = NULL) { #' wrowmean() for Matrices #' -#' Weighted column means over fixed-length groups for matrices. +#' Grouped column means over fixed-length groups for matrices. #' #' @noRd #' @keywords internal diff --git a/backlog/benchmark.R b/backlog/benchmark.R index 4f1308ef..8a80cab2 100644 --- a/backlog/benchmark.R +++ b/backlog/benchmark.R @@ -89,7 +89,7 @@ library(iml) # Might benefit of multiprocessing, but on Windows with XGB models library(DALEX) library(ingredients) library(flashlight) -library(microbenchmark) +library(bench) set.seed(1) @@ -107,53 +107,52 @@ fl <- flashlight( ) # Permutation importance: 10 repeats over full validation data (~2700 rows) -microbenchmark( +bench::mark( iml = FeatureImp$new(mod, n.repetitions = 10, loss = "mse", compare = "difference"), dalex = feature_importance(ex, B = 10, type = "difference", n_sample = Inf), flashlight = light_importance(fl, v = x, n_max = Inf, m_repetitions = 10), hstats = perm_importance(fit, X = X_valid, y = y_valid, m_rep = 10, verbose = FALSE), - times = 4 + check = FALSE, + min_iterations = 3 ) - -# Unit: milliseconds -# expr min lq mean median uq max neval cld -# iml 1610.4464 1622.3517 1657.6455 1642.3422 1692.9394 1735.4514 4 a -# dalex 580.5633 628.7967 665.1718 685.7349 701.5470 708.6542 4 b -# flashlight 622.3130 630.7167 648.9690 648.5589 667.2214 676.4453 4 b -# hstats 332.1432 334.4255 337.0738 337.0140 339.7221 342.1240 4 c + +# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time +# iml 1.58s 1.58s 0.631 209.4MB 2.73 3 13 4.76s +# dalex 566.21ms 586.91ms 1.72 34.6MB 0.572 3 1 1.75s +# flashlight 587.03ms 613.15ms 1.63 27.1MB 1.63 3 3 1.84s +# hstats 353.78ms 360.57ms 2.79 27.2MB 0 3 0 1.08s # Partial dependence (cont) v <- "tot_lvg_area" -microbenchmark( +bench::mark( iml = FeatureEffect$new(mod, feature = v, grid.size = 50, method = "pdp"), dalex = partial_dependence(ex, variables = v, N = Inf, grid_points = 50), flashlight = light_profile(fl, v = v, pd_n_max = Inf, n_bins = 50), hstats = partial_dep(fit, v = v, X = X_valid, grid_size = 50, n_max = Inf), - times = 4 + check = FALSE, + min_iterations = 3 ) -# Unit: milliseconds -# expr min lq mean median uq max neval cld -# iml 1098.6226 1111.7868 1123.7506 1129.6484 1135.7144 1137.0828 4 a -# dalex 740.6559 762.3050 827.4134 784.8789 892.5218 999.2398 4 b -# flashlight 363.2473 368.0095 392.9258 373.7185 417.8420 461.0187 4 c -# hstats 213.4137 214.0246 225.5381 224.5216 237.0517 239.6956 4 d - +# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time +# iml 1.11s 1.13s 0.887 376.3MB 3.84 3 13 3.38s +# dalex 782.13ms 783.08ms 1.24 192.8MB 2.90 3 7 2.41s +# flashlight 367.73ms 372.5ms 2.68 67.9MB 2.68 3 3 1.12s +# hstats 220.88ms 222.5ms 4.50 14.2MB 0 3 0 666.33ms + # Partial dependence (discrete) v <- "structure_quality" -microbenchmark( +bench::mark( iml = FeatureEffect$new(mod, feature = v, method = "pdp", grid.points = 1:5), dalex = partial_dependence(ex, variables = v, N = Inf, variable_type = "categorical", grid_points = 5), flashlight = light_profile(fl, v = v, pd_n_max = Inf), hstats = partial_dep(fit, v = v, X = X_valid, n_max = Inf), - times = 4 + check = FALSE, + min_iterations = 3 ) - -# Unit: milliseconds -# expr min lq mean median uq max neval cld -# iml 96.5188 96.84865 101.15893 99.27995 105.46920 109.5570 4 a -# dalex 166.5767 167.09505 169.68585 169.94295 172.27665 172.2808 4 b -# flashlight 40.8074 41.76215 49.22383 44.56515 56.68550 66.9576 4 c -# hstats 23.7283 23.86510 24.99588 24.01180 26.12665 28.2316 4 d +# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time +# iml 90ms 96ms 10.6 13.29MB 7.06 3 2 283ms +# dalex 170.6ms 174.4ms 5.73 20.55MB 2.87 2 1 349ms +# flashlight 40.8ms 43.8ms 23.1 6.36MB 2.10 11 1 476ms +# hstats 23.5ms 24.4ms 40.6 1.53MB 2.14 19 1 468ms # H-Stats -> we use a subset of 500 rows X_v500 <- X_valid[1:500, ]