Fix bug & Added cluster_col to show_group_enrichment().

ShixiangWang · Mar 13, 2024 · ec7805c · ec7805c
1 parent e5cd187
commit ec7805c
Show file tree

Hide file tree

Showing 7 changed files with 74 additions and 25 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -93,5 +93,5 @@ Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE, roclets = c("collate", "namespace", "rd",
     "roxytest::testthat_roclet"))
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(print,bytes)
 S3method(sig_tally,CopyNumber)
 S3method(sig_tally,MAF)
 S3method(sig_tally,RS)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,8 @@
 # sigminer 2.3.1
 
-- - Fixed the error in generating DBS and INDEL matrix when only one sample input (#453).
+- Added `cluster_col` to `show_group_enrichment()`.
+- Fixed the bug that error returned when `cluster_row = TRUE` & `return_list = TRUE` in function `show_group_enrichment()`.
+- Fixed the error in generating DBS and INDEL matrix when only one sample input (#453).
 
 # sigminer 2.3.0
 

diff --git a/R/show_group_enrichment.R b/R/show_group_enrichment.R
@@ -20,7 +20,7 @@
 #' @param cut_labels when `cut_p_value` is `TRUE`, this option set the labels.
 #' @param fill_scale a `Scale` object generated by `ggplot2` package to
 #' set color for continuous values.
-#' @param cluster_row if `TRUE`, cluster rows with Hierarchical Clustering ('complete' method).
+#' @param cluster_row,cluster_col if `TRUE`, cluster rows (or columns) with Hierarchical Clustering ('complete' method).
 #' @param ... other parameters passing to [ggplot2::facet_wrap], only used
 #' when `return_list` is `FALSE`.
 #'
@@ -40,12 +40,13 @@ show_group_enrichment <- function(df_enrich,
                                     midpoint = ifelse(fill_by_p_value, 0, 1)
                                   ),
                                   cluster_row = FALSE,
+                                  cluster_col = FALSE,
                                   ...) {
   if (fill_by_p_value) {
     df_enrich$p_value_up <- if (use_fdr) {
-      ifelse(df_enrich$fdr == 0, abs(log10(df_enrich$fdr + .Machine$double.xmin)), abs(log10(df_enrich$fdr))) 
+      ifelse(df_enrich$fdr == 0, abs(log10(df_enrich$fdr + .Machine$double.xmin)), abs(log10(df_enrich$fdr)))
     } else {
-      ifelse(df_enrich$p_value == 0, abs(log10(df_enrich$p_value + .Machine$double.xmin)), abs(log10(df_enrich$p_value))) 
+      ifelse(df_enrich$p_value == 0, abs(log10(df_enrich$p_value + .Machine$double.xmin)), abs(log10(df_enrich$p_value)))
     }
     df_enrich$p_value_up <- data.table::fifelse(
       df_enrich$measure_observed >= 1,
@@ -68,7 +69,8 @@ show_group_enrichment <- function(df_enrich,
           cut_labels = cut_labels,
           add_text_annotation = add_text_annotation,
           use_fdr = use_fdr,
-          cluster_row = cluster_row
+          cluster_row = cluster_row,
+          cluster_col = cluster_col
         )
       ) -> xx
     p <- xx$gg
@@ -83,7 +85,8 @@ show_group_enrichment <- function(df_enrich,
       cut_labels = cut_labels,
       add_text_annotation = add_text_annotation,
       use_fdr = use_fdr,
-      cluster_row = cluster_row
+      cluster_row = cluster_row,
+      cluster_col = cluster_col
     ) +
       facet_wrap(~grp_var, scales = scales, ...)
   }
@@ -98,7 +101,8 @@ plot_enrichment_simple <- function(data, x, y, fill_scale,
                                    cut_labels = c("< -10", "< -1.3", "nosig", "> 1.3", "> 10"),
                                    add_text_annotation = TRUE,
                                    use_fdr = TRUE,
-                                   cluster_row = FALSE) {
+                                   cluster_row = FALSE,
+                                   cluster_col = FALSE) {
   if (fill_by_p_value) {
     data$measure_observed <- round(data$measure_observed, 2)
   } else {
@@ -116,30 +120,68 @@ plot_enrichment_simple <- function(data, x, y, fill_scale,
     )
   }
 
+  get_cluster_order <- function(x, bycol = FALSE) {
+    x <- x %>%
+      tibble::column_to_rownames("grp1")
+    if (min(dim(x)) < 2) {
+      #warning("clustering is auto-disabled when any dim <2.", immediate. = TRUE)
+      message("clustering is auto-disabled when any dim <2.")
+      return(rownames(x))
+    }
+
+    if (bycol) x = t(x)
+    obj <- x %>%
+      scale() %>%
+      stats::dist() %>%
+      stats::hclust() %>%
+      stats::as.dendrogram()
+    rownames(x)[stats::order.dendrogram(obj)]
+  }
+
   # 支持行聚类(subgroup)
   if (isTRUE(cluster_row)) {
-    data2 <- data[, c(x, y, "grp_var", "measure_observed"), with = F]
+    has_grp_var = "grp_var" %in% colnames(data)
+    data2 <- data[, c(x, y, if (has_grp_var) "grp_var", "measure_observed"), with = F]
     data2 <- tidyr::pivot_wider(data2, names_from = x, values_from = "measure_observed")
 
-    get_cluster_order <- function(x) {
-      x <- x %>%
-        tibble::column_to_rownames("grp1")
-      obj <- x %>%
-        scale() %>%
-        stats::dist() %>%
-        stats::hclust() %>%
-        stats::as.dendrogram()
-      rownames(x)[stats::order.dendrogram(obj)]
+    if (has_grp_var) {
+      orders <- data2 %>%
+        dplyr::group_split(.data$grp_var, .keep = FALSE) %>%
+        purrr::map(get_cluster_order) %>%
+        purrr::reduce(c) %>%
+        unique()
+    } else {
+      orders <- get_cluster_order(data2) |>
+        unique()
     }
-    orders <- data2 %>%
-      dplyr::group_split(.data$grp_var, .keep = FALSE) %>%
-      purrr::map(get_cluster_order) %>%
-      purrr::reduce(c) %>%
-      unique()
-    message("All subgroup orders: ", paste(orders, collapse = ", "))
+
+    message("subgroup orders: ", paste(orders, collapse = ", "))
     data$grp1 <- factor(data$grp1, levels = orders)
   }
 
+  # 支持列聚类(variable)
+  if (isTRUE(cluster_col)) {
+    has_grp_var = "grp_var" %in% colnames(data)
+    data2 <- data[, c(x, y, if (has_grp_var) "grp_var", "measure_observed"), with = F]
+    data2 <- tidyr::pivot_wider(data2, names_from = x, values_from = "measure_observed")
+
+    if (has_grp_var) {
+      orders <- data2 %>%
+        dplyr::group_split(.data$grp_var, .keep = FALSE) %>%
+        purrr::map(get_cluster_order, bycol = TRUE) %>%
+        purrr::reduce(c) %>%
+        unique()
+    } else {
+      orders <- get_cluster_order(data2, bycol = TRUE) |>
+        unique()
+    }
+
+    message("variable orders: ", paste(orders, collapse = ", "))
+    # 如果有多个 grp_var，enrich_var的顺序会在不同的grp_var中不同，仅使用第一个
+    message("  - clustering column is suitable for case with one grp_var or return_list is TRUE.")
+    data$enrich_var <- factor(data$enrich_var, levels = orders)
+  }
+
   p <- ggplot(
     data,
     aes_string(

diff --git a/R/utils_mem.R b/R/utils_mem.R
@@ -26,6 +26,7 @@ show_bytes <- function(x) {
   structure(x, class = "bytes")
 }
 
+#' @export
 print.bytes <- function(x, digits = 3, ...) {
   power <- min(floor(log(abs(x), 1000)), 4)
   if (power < 1) {

diff --git a/man/show_group_enrichment.Rd b/man/show_group_enrichment.Rd
diff --git a/man/sigminer-package.Rd b/man/sigminer-package.Rd