Merge pull request #271 from jr-leary7/dev

Dev
jr-leary7 · Jan 14, 2025 · a5f7c8b · a5f7c8b
2 parents f0f3927 + 6b4240b
commit a5f7c8b
Show file tree

Hide file tree

Showing 8 changed files with 74 additions and 61 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: scLANE
 Type: Package
 Title: Model Gene Expression Dynamics with Spline-Based NB GLMs, GEEs, & GLMMs
-Version: 0.8.7
+Version: 0.99.0
 Authors@R: c(person(given = c("Jack", "R."), family = "Leary", email = "[email protected]", role = c("aut", "cre"), comment = c(ORCID = "0009-0004-8821-3269")), 
              person(given = "Rhonda", family = "Bacher", email = "[email protected]", role = c("ctb", "fnd"), comment = c(ORCID = "0000-0001-5787-476X")))
 Description: Our scLANE model uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime or latent time. 

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# Changes in v0.99.0
+
++ Preparing for BioConductor submission.
++ Slightly adjusted `waldTestGEE()` and `scoreTestGEE()` to be more efficient. 
+
 # Changes in v0.8.7 
 
 + Switched GEE fitting back to use `scale.fix = FALSE` and substituted a fixed value for the Negative-binomial overdispersion parameter (instead of estimating via method-of-moments) as it improves model fits. 

diff --git a/R/scoreTestGEE.R b/R/scoreTestGEE.R
@@ -4,6 +4,7 @@
 #' @author Jack R. Leary
 #' @description Performs a basic Lagrange multiplier test to determine whether an alternate model is significantly better than a nested null model. This is the GEE equivalent (kind of) of \code{\link{modelLRT}}. Be careful with small sample sizes.
 #' @importFrom stats model.matrix predict pchisq
+#' @importFrom MASS negative.binomial
 #' @importFrom Matrix bdiag
 #' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL.
 #' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL.
@@ -52,10 +53,10 @@ scoreTestGEE <- function(mod.1 = NULL,
     theta <- as.numeric(gsub("\\)", "", gsub(".*\\(", "", mod.1$FunList$family)))
     X_null <- stats::model.matrix(mod.0, data = null.df)
     X_alt <- stats::model.matrix(mod.1, data = alt.df)
-    r_null <- null.df$Y - exp(stats::predict(mod.0))
+    r_null <- null.df$Y - fitted(mod.0)
     p_alt <- ncol(X_alt)
     groups <- unique(id.vec)
-    W <- K <- vector("list", length = length(groups))
+    W_list <- K_inv_list <- vector("list", length = length(groups))
     for (i in seq(groups)) {
       group_idx <- which(id.vec == groups[i])
       n_i <- length(group_idx)
@@ -69,39 +70,50 @@ scoreTestGEE <- function(mod.1 = NULL,
         R_i <- matrix(rho^abs(outer(seq(n_i), seq(n_i), "-")), nrow = n_i, ncol = n_i)
       }
       # create working covariance matrix V_i
-      mu_i <- exp(stats::predict(mod.0)[group_idx])
-      V_mu_i <- mu_i * (1 + mu_i / theta)
+      mu_i <- mod.0$FunList$linkinv(mod.0$eta)[group_idx]
+      V_mu_i <- mod.0$FunList$variance(mu_i)
       A_i <- diag(V_mu_i)
       A_i_sqrt <- sqrt(A_i)  # same as taking the 1/2 power of A_i since A_i is diagonal
       V_i <- A_i_sqrt %*% R_i %*% A_i_sqrt
-      # create matrices W_i and K_i
-      K_i <- diag(mu_i)
       V_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE)
       if (inherits(V_i_inv, "try-error")) {
         V_i_inv <- eigenMapPseudoInverse(V_i)
       }
+      # create matrices W_i and K_i
+      K_i <- diag(MASS::negative.binomial(theta = 1, link = "log")$mu.eta(mod.0$eta)[group_idx])
       W_i <- K_i %*% V_i_inv %*% K_i
-      W[[i]] <- W_i
-      K[[i]] <- K_i
-    }
-    W <- as.matrix(Matrix::bdiag(W))
-    K <- as.matrix(Matrix::bdiag(K))
-    K_inv <- try({ eigenMapMatrixInvert(K) }, silent = TRUE)
-    if (inherits(K_inv, "try-error")) {
-      K_inv <- eigenMapPseudoInverse(K)
+      W_list[[i]] <- W_i
+      K_inv <- try({ eigenMapMatrixInvert(K_i) }, silent = TRUE)
+      if (inherits(K_inv, "try-error")) {
+        K_inv <- eigenMapPseudoInverse(K_i)
+      }
+      K_inv_list[[i]] <- K_inv
     }
-    # generate score vector
+    W <- as.matrix(Matrix::bdiag(W_list))
+    K_inv <- as.matrix(Matrix::bdiag(K_inv_list))
+    # score under the null
     U <- phi^(-1) * t(X_alt) %*% W %*% K_inv %*% r_null
-    # generate variance of score vector and invert it
-    V_U <- phi^(-2) * t(X_alt) %*% W %*% X_alt
+    # Generate variance of score vector under the null
+    V_U <- t(X_alt) %*% W %*% X_alt
     V_U_inv <- try({ eigenMapMatrixInvert(V_U) }, silent = TRUE)
     if (inherits(V_U_inv, "try-error")) {
       V_U_inv <- eigenMapPseudoInverse(V_U)
     }
+    VarM_hat <- phi * V_U_inv
+    Lpmat <-  diag(1, nrow = p_alt, ncol = p_alt)
+    Lpmat <- Lpmat[-1,,drop=FALSE]
+    Lmat <- t(Lpmat)
+    mid <- Lpmat %*% VarM_hat %*% Lmat
+    mid_inv <- try({ eigenMapMatrixInvert(mid) }, silent = TRUE)
+    if (inherits(mid_inv, "try-error")) {
+      mid_inv <- eigenMapPseudoInverse(mid)
+    }
+    full_mid <- VarM_hat %*% Lmat %*% mid_inv %*% Lpmat %*% VarM_hat
     # estimate test statistic and accompanying p-value
-    S <- t(U) %*% V_U_inv %*% U
-    S_df <- ncol(X_alt) - ncol(X_null)
-    p_value <- 1 - stats::pchisq(S, df = S_df)
+    S <- t(U) %*% full_mid %*% U
+    S_df = p_alt - 1
+    p_value <- 1 - stats::pchisq(S, df = S_df)  # r from L matrix
+    # format results
     res <- list(Score_Stat = S,
                 DF = S_df,
                 P_Val = p_value,

diff --git a/R/summary.R b/R/summary.R
@@ -1,20 +1,20 @@
-#' Summary method for scLANE objects. 
-#' 
+#' Summary method for scLANE objects.
+#'
 #' @name summary.scLANE
 #' @author Jack R. Leary
-#' @importFrom purrr map reduce 
-#' @importFrom stats p.adjust 
-#' @param test.dyn.res The nested list returned by \code{\link{testDynamic}}. Defaults to NULL. 
-#' @param (Optional) The method used to adjust \emph{p}-values for multiple hypothesis testing. Defaults to "fdr".
+#' @importFrom purrr map reduce
+#' @importFrom stats p.adjust
+#' @param test.dyn.res The nested list returned by \code{\link{testDynamic}}. Defaults to NULL.
+#' @param p.adj.method (Optional) The method used to adjust \emph{p}-values for multiple hypothesis testing. Defaults to "fdr".
 #' @param fdr.cutoff (Optional) The FDR threshold for determining statistical significance. Defaults to 0.01.
-#' @return A summary list with aggregated statistics concerning the trajectory DE tests from \code{scLANE}. 
+#' @return A summary list with aggregated statistics concerning the trajectory DE tests from \code{scLANE}.
 #' @export
 #' @examples
 #' data(scLANE_models)
 #' summary(scLANE_models)
 
-summary.scLANE <- function(test.dyn.res = NULL, 
-                           p.adj.method = "fdr", 
+summary.scLANE <- function(test.dyn.res = NULL,
+                           p.adj.method = "fdr",
                            fdr.cutoff = 0.01) {
   if (!inherits(test.dyn.res, "scLANE")) { stop("The input must be an object of class 'scLANE'.") }
   summary_stats <- list()
@@ -29,18 +29,18 @@ summary.scLANE <- function(test.dyn.res = NULL,
   adj_p_values <- stats::p.adjust(sort(p_values), method = p.adj.method)
   summary_stats$n_significant_genes <- sum(adj_p_values < fdr.cutoff, na.rm = TRUE)
   summary_stats$mean_adj_p_value <- mean(adj_p_values, na.rm = TRUE)
-  summary_stats$test_type <- ifelse(test.dyn.res[[1]][[1]]$Test_Stat_Type == "LRT", 
-                                    "Likelihood Ratio Test", 
+  summary_stats$test_type <- ifelse(test.dyn.res[[1]][[1]]$Test_Stat_Type == "LRT",
+                                    "Likelihood Ratio Test",
                                     ifelse(test.dyn.res[[1]][[1]]$Test_Stat_Type == "Wald", "Wald Test", "Score Test"))
   class(summary_stats) <- "summary.scLANE"
   return(summary_stats)
 }
 
-#' Print method for summary.scLANE objects. 
-#' 
+#' Print method for summary.scLANE objects.
+#'
 #' @name print.summary.scLANE
 #' @author Jack R. Leary
-#' @param x An object of class summary.scLANE. 
+#' @param x An object of class summary.scLANE.
 #' @export
 
 print.summary.scLANE <- function(x) {

diff --git a/R/testDynamic.R b/R/testDynamic.R
@@ -25,7 +25,7 @@
 #' @param is.gee Should a GEE framework be used instead of the default GLM? Defaults to FALSE.
 #' @param cor.structure If the GEE framework is used, specifies the desired working correlation structure. Must be one of "ar1", "independence", or "exchangeable". Defaults to "ar1".
 #' @param gee.bias.correction.method (Optional) Specify which small-sample bias correction to be used on the sandwich variance-covariance matrix prior to test statistic estimation. Options are "kc" and "df". Defaults to NULL, indicating the use of the model-based variance.
-#' @param gee.test A string specifying the type of test used to estimate the significance of the full model. Must be one of "wald" or "score". Defaults to "score".
+#' @param gee.test A string specifying the type of test used to estimate the significance of the full model. Must be one of "wald" or "score". Defaults to "wald".
 #' @param is.glmm Should a GLMM framework be used instead of the default GLM? Defaults to FALSE.
 #' @param id.vec If a GEE or GLMM framework is being used, a vector of subject IDs to use as input to \code{\link[geeM]{geem}} or \code{\link[glmmTMB]{glmmTMB}}. Defaults to NULL.
 #' @param glmm.adaptive (Optional) Should the basis functions for the GLMM be chosen adaptively? If not, uses 4 evenly spaced knots. Defaults to TRUE.
@@ -64,7 +64,7 @@ testDynamic <- function(expr.mat = NULL,
                         is.gee = FALSE,
                         cor.structure = "ar1",
                         gee.bias.correction.method = NULL,
-                        gee.test = "score",
+                        gee.test = "wald",
                         is.glmm = FALSE,
                         glmm.adaptive = TRUE,
                         id.vec = NULL,

diff --git a/R/waldTestGEE.R b/R/waldTestGEE.R
@@ -42,7 +42,6 @@ waldTestGEE <- function(mod.1 = NULL,
     if (!(correction.method %in% c("df", "kc"))) { stop("Unsupported bias correction method in waldTestGEE().") }
     if (correction.method == "kc" && is.null(id.vec)) { stop("The Kauermann and Carroll bias correction method requires a vector of subject IDs.") }
   }
-
   mod.1 <- mod.1$final_mod
   if (!(inherits(mod.1, "geem") && inherits(mod.0, "geem"))) { stop("You must provide two geeM models to waldTestGee().") }
   if (length(coef(mod.0)) != 1) { stop("Null GEE model must be intercept-only.") }
@@ -53,15 +52,8 @@ waldTestGEE <- function(mod.1 = NULL,
                 P_Val = 1,
                 Notes = NA_character_)
   } else {
-    # compute test statistic & asymptotic p-value
-    coef_alt_mod <- names(coef(mod.1))
-    coef_null_mod <- names(coef(mod.0))
-    coef_diff <- setdiff(coef_alt_mod, coef_null_mod)
-    coef_idx <- rep(0, length(coef_diff))
-    for (i in seq_len(length(coef_diff))) {
-      coef_idx[i] <- which(coef_diff[i] == coef_alt_mod)
-    }
-    coef_vals <- as.matrix(coef(mod.1)[coef_idx])
+    # compute test statistic, optionally bias-adjust, & estimate asymptotic p-value
+    coef_vals <- as.matrix(coef(mod.1))
     if (!is.null(correction.method)) {
       vcov_mat <- as.matrix(mod.1$var)
       vcov_mat <- biasCorrectGEE(mod.1,
@@ -72,24 +64,28 @@ waldTestGEE <- function(mod.1 = NULL,
     } else {
       vcov_mat <- as.matrix(mod.1$naiv.var)
     }
-    vcov_mat <- vcov_mat[coef_idx, coef_idx]
-    wald_test_stat <- try({
-      vcov_mat_inv <- eigenMapMatrixInvert(vcov_mat, n_cores = 1L)
-      if (inherits(vcov_mat_inv, "try-error")) {
-        vcov_mat_inv <- eigenMapPseudoInverse(vcov_mat, n_cores = 1L)
-      }
-      as.numeric(crossprod(coef_vals, vcov_mat_inv) %*% coef_vals)
-    }, silent = TRUE)
+    p_alt <- length(coef(mod.1))
+    Lpmat <-  diag(1, nrow = p_alt, ncol = p_alt)
+    Lpmat <- Lpmat[-1,,drop=FALSE]
+    Lmat <- t(Lpmat)
+    middle <- Lpmat %*% vcov_mat %*% Lmat
+    middle_inv <- try({ eigenMapMatrixInvert(middle) }, silent = TRUE)
+    if (inherits(middle_inv, "try-error")) {
+      middle_inv <- eigenMapPseudoInverse(middle)
+    }
+    sides <- Lpmat %*% coef(mod.1)
+    wald_test_stat <- t(sides) %*% middle_inv %*% sides
     if (inherits(wald_test_stat, "try-error")) {
       wald_note <- wald_test_stat[1]  # this is the error message
       wald_test_stat <- 0
       p_value <- 1
     } else {
-      p_value <- as.numeric(1 - stats::pchisq(wald_test_stat, df = length(coef_diff)))
+      p_value <- as.numeric(1 - stats::pchisq(wald_test_stat, df = p_alt - 1))
       wald_note <- NA_character_
     }
+    # format results
     res <- list(Wald_Stat = wald_test_stat,
-                DF = length(coef_diff),
+                DF = p_alt - 1,
                 P_Val = p_value,
                 Notes = wald_note)
   }

diff --git a/man/summary.scLANE.Rd b/man/summary.scLANE.Rd
diff --git a/man/testDynamic.Rd b/man/testDynamic.Rd