From e40862c1b5bff223a48460c5500aac212f71efde Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Mon, 16 Sep 2024 11:22:49 -0400 Subject: [PATCH 1/2] bumped version to 0.8.1 --- DESCRIPTION | 2 +- NEWS.md | 6 +++++- R/chooseCandidateGenes.R | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bf4f313..41061c8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: scLANE Type: Package Title: Model gene expression dynamics with spline-based NB GLMs, GEEs, & GLMMs -Version: 0.8.0 +Version: 0.8.1 Authors@R: c(person(given = "Jack", family = "Leary", email = "j.leary@ufl.edu", role = c("aut", "cre"), comment = c(ORCID = "0009-0004-8821-3269")), person(given = "Rhonda", family = "Bacher", email = "rbacher@ufl.edu", role = c("ctb", "fnd"), comment = c(ORCID = "0000-0001-5787-476X"))) Description: scLANE uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime or latent time. diff --git a/NEWS.md b/NEWS.md index 4eab793..1c44a9d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,11 @@ +# Changes in version 0.8.1 + ++ Added a function called `chooseCandidateGenes()` to identify good genes for trajectory DE testing based on mean / SD expression and sparsity. + # Changes in version 0.8.0 + Added implicit regularization of selected basis functions to the GLMM mode using a NB LASSO. -+ Switched candidate knot subsampling to a uniform sequence of candidate knots across pseudotime's support. ++ Switched candidate knot subsampling to a uniform sequence of candidate1 knots across pseudotime's support. # Changes in version 0.7.9 diff --git a/R/chooseCandidateGenes.R b/R/chooseCandidateGenes.R index 45c2d02..1de5ce3 100644 --- a/R/chooseCandidateGenes.R +++ b/R/chooseCandidateGenes.R @@ -44,7 +44,7 @@ chooseCandidateGenes <- function(obj = NULL, grouped_stats <- purrr::map(seq(unique(id.vec)), \(i) { sub_matrix <- counts_matrix[, which(id.vec == unique(id.vec)[i])] gene_means <- Matrix::rowMeans(sub_matrix) - gene_sds <- sqrt(Matrix::rowMeans((sub_matrix - Matrix::rowMeans(sub_matrix))^2)) + gene_sds <- sqrt(Matrix::rowMeans((sub_matrix - gene_means)^2)) gene_sparsity <- Matrix::rowMeans(sub_matrix == 0) res <- data.frame(subject = unique(id.vec)[i], gene = rownames(sub_matrix), @@ -62,7 +62,7 @@ chooseCandidateGenes <- function(obj = NULL, } else { gene_means <- Matrix::rowMeans(counts_matrix) - gene_sds <- sqrt(Matrix::rowMeans((counts_matrix - Matrix::rowMeans(counts_matrix))^2)) + gene_sds <- sqrt(Matrix::rowMeans((counts_matrix - gene_means)^2)) gene_sparsity <- Matrix::rowMeans(counts_matrix == 0) gene_df <- data.frame(gene = rownames(counts_matrix), mu = unname(gene_means), From 021ae1ebd96ed7a4be97d70b0905ddade67cbcaf Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Thu, 19 Sep 2024 20:43:20 -0400 Subject: [PATCH 2/2] added small-sample bias correction method to GEE wald test -- related to #197 --- DESCRIPTION | 2 +- R/testDynamic.R | 4 +++- R/waldTestGEE.R | 18 +++++++++++++++--- man/waldTestGEE.Rd | 9 ++++++++- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 41061c8..9904ad5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Description: scLANE uses truncated power basis spline models to build flexible, Downstream analysis functionalities include model comparison, dynamic gene clustering, smoothed counts generation, gene set enrichment testing, & visualization. License: MIT + file LICENSE Encoding: UTF-8 -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Depends: glm2, magrittr, diff --git a/R/testDynamic.R b/R/testDynamic.R index 71f7849..ea149d9 100644 --- a/R/testDynamic.R +++ b/R/testDynamic.R @@ -331,7 +331,9 @@ testDynamic <- function(expr.mat = NULL, # compute test stat using asymptotic Chi-squared approximation if (is.gee) { - test_res <- waldTestGEE(mod.1 = marge_mod, mod.0 = null_mod) + test_res <- waldTestGEE(mod.1 = marge_mod, + mod.0 = null_mod, + bias.correct = ifelse(length(unique(id.vec)) < 30, TRUE, FALSE)) } else { test_res <- modelLRT(mod.1 = marge_mod, mod.0 = null_mod, diff --git a/R/waldTestGEE.R b/R/waldTestGEE.R index 33ab6cc..48bd17f 100644 --- a/R/waldTestGEE.R +++ b/R/waldTestGEE.R @@ -7,6 +7,7 @@ #' @importFrom stats pchisq #' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL. #' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL. +#' @param bias.correct Boolean specifying whether a small-sample bias correction should be applied to the estimated sandwich variance-covariance matrix. Useful when the number of subjects is small. Defaults to FALSE. #' @return A list containing the Wald test statistic, a \emph{p}-value, and the degrees of freedom used in the test. #' @details #' \itemize{ @@ -18,7 +19,10 @@ #' @seealso \code{\link[geeM]{geem}} #' @seealso \code{\link{modelLRT}} -waldTestGEE <- function(mod.1 = NULL, mod.0 = NULL) { +waldTestGEE <- function(mod.1 = NULL, + mod.0 = NULL, + bias.correct = FALSE, + correction.method = "df") { # check inputs if (inherits(mod.1, "try-error") || inherits(mod.0, "try-error")) { res <- list(Wald_Stat = NA, @@ -27,9 +31,10 @@ waldTestGEE <- function(mod.1 = NULL, mod.0 = NULL) { Notes = "No test performed due to model failure.") return(res) } - + correction.method <- tolower(correction.method) + if (!correction.method %in% c("df")) { stop("Unsupported bias correction method in waldTestGEE().") } mod.1 <- mod.1$final_mod - if (is.null(mod.1) || is.null(mod.0) || !(inherits(mod.1, "geem") && inherits(mod.0, "geem"))) { stop("You must provide two geeM models to wald_test_gee().") } + if (is.null(mod.1) || is.null(mod.0) || !(inherits(mod.1, "geem") && inherits(mod.0, "geem"))) { stop("You must provide two geeM models to waldTestGee().") } if (length(coef(mod.1)) <= length(coef(mod.0))) { # can't calculate Wald statistic if both models are intercept-only res <- list(Wald_Stat = 0, @@ -47,6 +52,13 @@ waldTestGEE <- function(mod.1 = NULL, mod.0 = NULL) { } coef_vals <- as.matrix(coef(mod.1)[coef_idx]) vcov_mat <- as.matrix(mod.1$var)[coef_idx, coef_idx] + if (bias.correct) { + if (correction.method == "df") { + n_s <- length(mod.1$clusz) + p <- ncol(mod.1$X) - 1 + vcov_mat <- (n_s / (n_s - p)) * vcov_mat + } + } wald_test_stat <- try({ as.numeric(crossprod(coef_vals, MASS::ginv(vcov_mat)) %*% coef_vals) }, silent = TRUE) diff --git a/man/waldTestGEE.Rd b/man/waldTestGEE.Rd index fdc8570..cf8e03b 100644 --- a/man/waldTestGEE.Rd +++ b/man/waldTestGEE.Rd @@ -4,12 +4,19 @@ \alias{waldTestGEE} \title{Use a Wald test to compare nested GEE models.} \usage{ -waldTestGEE(mod.1 = NULL, mod.0 = NULL) +waldTestGEE( + mod.1 = NULL, + mod.0 = NULL, + bias.correct = FALSE, + correction.method = "df" +) } \arguments{ \item{mod.1}{The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL.} \item{mod.0}{The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL.} + +\item{bias.correct}{Boolean specifying whether a small-sample bias correction should be applied to the estimated sandwich variance-covariance matrix. Useful when the number of subjects is small. Defaults to FALSE.} } \value{ A list containing the Wald test statistic, a \emph{p}-value, and the degrees of freedom used in the test.