diff --git a/R/utils_calculate.R b/R/utils_calculate.R
index 875b4b9d..760c62ee 100644
--- a/R/utils_calculate.R
+++ b/R/utils_calculate.R
@@ -95,9 +95,9 @@ colMeans_factor <- function(x) {
   out
 }
 
-#' Fast Weighted Mean by Fixed-Length Groups
+#' Grouped Column Means
 #' 
-#' Internal workhorse to aggregate predictions over fixed-length grids.
+#' Internal workhorse to aggregate predictions over fixed-length groups.
 #' 
 #' @noRd
 #' @keywords internal
@@ -138,7 +138,7 @@ wrowmean <- function(x, ngroups = 1L, w = NULL) {
 
 #' (w)rowmean() for Factors (without weights)
 #'
-#' `colMeans_factor()` for equal sized groups.
+#' Grouped `colMeans_factor()` for equal sized groups.
 #'
 #' @noRd
 #' @keywords internal
@@ -158,7 +158,7 @@ rowmean_factor <- function(x, ngroups = 1L) {
 
 #' wrowmean() for Vectors
 #'
-#' Weighted column means over fixed-length groups for vectors or 1d matrices.
+#' Grouped means over fixed-length groups for vectors or 1d matrices.
 #'
 #' @noRd
 #' @keywords internal
@@ -182,7 +182,7 @@ wrowmean_vector <- function(x, ngroups = 1L, w = NULL) {
 
 #' wrowmean() for Matrices
 #'
-#' Weighted column means over fixed-length groups for matrices.
+#' Grouped column means over fixed-length groups for matrices.
 #'
 #' @noRd
 #' @keywords internal
diff --git a/backlog/benchmark.R b/backlog/benchmark.R
index 4f1308ef..8a80cab2 100644
--- a/backlog/benchmark.R
+++ b/backlog/benchmark.R
@@ -89,7 +89,7 @@ library(iml)  # Might benefit of multiprocessing, but on Windows with XGB models
 library(DALEX)
 library(ingredients)
 library(flashlight)
-library(microbenchmark)
+library(bench)
 
 set.seed(1)
 
@@ -107,53 +107,52 @@ fl <- flashlight(
 )
   
 # Permutation importance: 10 repeats over full validation data (~2700 rows)
-microbenchmark(
+bench::mark(
   iml = FeatureImp$new(mod, n.repetitions = 10, loss = "mse", compare = "difference"),
   dalex = feature_importance(ex, B = 10, type = "difference", n_sample = Inf),
   flashlight = light_importance(fl, v = x, n_max = Inf, m_repetitions = 10),
   hstats = perm_importance(fit, X = X_valid, y = y_valid, m_rep = 10, verbose = FALSE),
-  times = 4
+  check = FALSE,
+  min_iterations = 3
 )
- 
-# Unit: milliseconds
-# expr        min        lq      mean    median        uq       max neval cld
-# iml        1610.4464 1622.3517 1657.6455 1642.3422 1692.9394 1735.4514     4 a  
-# dalex       580.5633  628.7967  665.1718  685.7349  701.5470  708.6542     4  b 
-# flashlight  622.3130  630.7167  648.9690  648.5589  667.2214  676.4453     4  b 
-# hstats      332.1432  334.4255  337.0738  337.0140  339.7221  342.1240     4   c
+
+# expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
+# iml           1.58s    1.58s     0.631   209.4MB    2.73      3    13      4.76s
+# dalex      566.21ms 586.91ms     1.72     34.6MB    0.572     3     1      1.75s
+# flashlight 587.03ms 613.15ms     1.63     27.1MB    1.63      3     3      1.84s
+# hstats     353.78ms 360.57ms     2.79     27.2MB    0         3     0      1.08s
 
 # Partial dependence (cont)
 v <- "tot_lvg_area"
-microbenchmark(
+bench::mark(
   iml = FeatureEffect$new(mod, feature = v, grid.size = 50, method = "pdp"),
   dalex = partial_dependence(ex, variables = v, N = Inf, grid_points = 50),
   flashlight = light_profile(fl, v = v, pd_n_max = Inf, n_bins = 50),
   hstats = partial_dep(fit, v = v, X = X_valid, grid_size = 50, n_max = Inf),
-  times = 4
+  check = FALSE,
+  min_iterations = 3
 )
-# Unit: milliseconds
-# expr             min        lq      mean    median        uq       max neval  cld
-# iml        1098.6226 1111.7868 1123.7506 1129.6484 1135.7144 1137.0828     4 a   
-# dalex       740.6559  762.3050  827.4134  784.8789  892.5218  999.2398     4  b  
-# flashlight  363.2473  368.0095  392.9258  373.7185  417.8420  461.0187     4   c 
-# hstats      213.4137  214.0246  225.5381  224.5216  237.0517  239.6956     4    d
-
+# expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
+# iml           1.11s    1.13s     0.887   376.3MB     3.84     3    13      3.38s
+# dalex      782.13ms 783.08ms     1.24    192.8MB     2.90     3     7      2.41s
+# flashlight 367.73ms  372.5ms     2.68     67.9MB     2.68     3     3      1.12s
+# hstats     220.88ms  222.5ms     4.50     14.2MB     0        3     0   666.33ms
+ 
 # Partial dependence (discrete)
 v <- "structure_quality"
-microbenchmark(
+bench::mark(
   iml = FeatureEffect$new(mod, feature = v, method = "pdp", grid.points = 1:5),
   dalex = partial_dependence(ex, variables = v, N = Inf, variable_type = "categorical", grid_points = 5),
   flashlight = light_profile(fl, v = v, pd_n_max = Inf),
   hstats = partial_dep(fit, v = v, X = X_valid, n_max = Inf),
-  times = 4
+  check = FALSE,
+  min_iterations = 3
 )
-
-# Unit: milliseconds
-# expr      min        lq      mean    median        uq      max neval  cld
-# iml         96.5188  96.84865 101.15893  99.27995 105.46920 109.5570     4 a   
-# dalex      166.5767 167.09505 169.68585 169.94295 172.27665 172.2808     4  b  
-# flashlight  40.8074  41.76215  49.22383  44.56515  56.68550  66.9576     4   c 
-# hstats      23.7283  23.86510  24.99588  24.01180  26.12665  28.2316     4    d
+# expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
+# iml            90ms     96ms     10.6    13.29MB     7.06     3     2      283ms
+# dalex       170.6ms  174.4ms      5.73   20.55MB     2.87     2     1      349ms
+# flashlight   40.8ms   43.8ms     23.1     6.36MB     2.10    11     1      476ms
+# hstats       23.5ms   24.4ms     40.6     1.53MB     2.14    19     1      468ms
 
 # H-Stats -> we use a subset of 500 rows
 X_v500 <- X_valid[1:500, ]