Skip to content

Commit c18a07c

Browse files
committed
Added more info.
1 parent 628086f commit c18a07c

File tree

2 files changed

+232
-226
lines changed

2 files changed

+232
-226
lines changed

chap7.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,18 @@ library(ggplot2)
2222
#' ruspini_scaled data is in package cluster. It is a very simple data set with well separated clusters.
2323
data(ruspini, package = "cluster")
2424

25-
#' Shuffle rows
25+
#' Shuffle rows (using `sample_frac` which samples by default 100%).
2626
ruspini <- as_tibble(ruspini) %>% sample_frac()
2727
ruspini
2828

2929
ggplot(ruspini, aes(x = x, y = y)) + geom_point()
3030

3131
#' Scale each column in the data to zero mean and unit standard deviation (z-scores). This prevents one attribute with a large range to dominate the others for the distance calculation.
32-
ruspini_scaled <- ruspini %>% scale() %>% as_tibble()
32+
#' _Note:_ The standard `scale()` function scales whole data.frames so we implement a function for a single vector and apply it to all numeric
33+
#' columns.
34+
scale2 <- function(x) (x - mean(x, na.rm = TRUE)) / sd(x, na.rm = TRUE)
35+
ruspini_scaled <- ruspini %>% mutate_if(is.numeric, scale2)
36+
3337
ggplot(ruspini_scaled, aes(x = x, y = y)) + geom_point()
3438

3539
#' # Clustering methods

0 commit comments

Comments
 (0)