diff --git a/DESCRIPTION b/DESCRIPTION index 254d0cf..73705e1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: scLANE Type: Package Title: Model Gene Expression Dynamics with Spline-Based NB GLMs, GEEs, & GLMMs -Version: 0.8.7 +Version: 0.99.0 Authors@R: c(person(given = c("Jack", "R."), family = "Leary", email = "j.leary@ufl.edu", role = c("aut", "cre"), comment = c(ORCID = "0009-0004-8821-3269")), person(given = "Rhonda", family = "Bacher", email = "rbacher@ufl.edu", role = c("ctb", "fnd"), comment = c(ORCID = "0000-0001-5787-476X"))) Description: Our scLANE model uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime or latent time. diff --git a/NEWS.md b/NEWS.md index 0c145f1..5405ecc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# Changes in v0.99.0 + ++ Preparing for BioConductor submission. ++ Slightly adjusted `waldTestGEE()` and `scoreTestGEE()` to be more efficient. + # Changes in v0.8.7 + Switched GEE fitting back to use `scale.fix = FALSE` and substituted a fixed value for the Negative-binomial overdispersion parameter (instead of estimating via method-of-moments) as it improves model fits. diff --git a/R/scoreTestGEE.R b/R/scoreTestGEE.R index 313b767..cd082f4 100644 --- a/R/scoreTestGEE.R +++ b/R/scoreTestGEE.R @@ -4,6 +4,7 @@ #' @author Jack R. Leary #' @description Performs a basic Lagrange multiplier test to determine whether an alternate model is significantly better than a nested null model. This is the GEE equivalent (kind of) of \code{\link{modelLRT}}. Be careful with small sample sizes. #' @importFrom stats model.matrix predict pchisq +#' @importFrom MASS negative.binomial #' @importFrom Matrix bdiag #' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL. #' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL. @@ -52,10 +53,10 @@ scoreTestGEE <- function(mod.1 = NULL, theta <- as.numeric(gsub("\\)", "", gsub(".*\\(", "", mod.1$FunList$family))) X_null <- stats::model.matrix(mod.0, data = null.df) X_alt <- stats::model.matrix(mod.1, data = alt.df) - r_null <- null.df$Y - exp(stats::predict(mod.0)) + r_null <- null.df$Y - fitted(mod.0) p_alt <- ncol(X_alt) groups <- unique(id.vec) - W <- K <- vector("list", length = length(groups)) + W_list <- K_inv_list <- vector("list", length = length(groups)) for (i in seq(groups)) { group_idx <- which(id.vec == groups[i]) n_i <- length(group_idx) @@ -69,39 +70,50 @@ scoreTestGEE <- function(mod.1 = NULL, R_i <- matrix(rho^abs(outer(seq(n_i), seq(n_i), "-")), nrow = n_i, ncol = n_i) } # create working covariance matrix V_i - mu_i <- exp(stats::predict(mod.0)[group_idx]) - V_mu_i <- mu_i * (1 + mu_i / theta) + mu_i <- mod.0$FunList$linkinv(mod.0$eta)[group_idx] + V_mu_i <- mod.0$FunList$variance(mu_i) A_i <- diag(V_mu_i) A_i_sqrt <- sqrt(A_i) # same as taking the 1/2 power of A_i since A_i is diagonal V_i <- A_i_sqrt %*% R_i %*% A_i_sqrt - # create matrices W_i and K_i - K_i <- diag(mu_i) V_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE) if (inherits(V_i_inv, "try-error")) { V_i_inv <- eigenMapPseudoInverse(V_i) } + # create matrices W_i and K_i + K_i <- diag(MASS::negative.binomial(theta = 1, link = "log")$mu.eta(mod.0$eta)[group_idx]) W_i <- K_i %*% V_i_inv %*% K_i - W[[i]] <- W_i - K[[i]] <- K_i - } - W <- as.matrix(Matrix::bdiag(W)) - K <- as.matrix(Matrix::bdiag(K)) - K_inv <- try({ eigenMapMatrixInvert(K) }, silent = TRUE) - if (inherits(K_inv, "try-error")) { - K_inv <- eigenMapPseudoInverse(K) + W_list[[i]] <- W_i + K_inv <- try({ eigenMapMatrixInvert(K_i) }, silent = TRUE) + if (inherits(K_inv, "try-error")) { + K_inv <- eigenMapPseudoInverse(K_i) + } + K_inv_list[[i]] <- K_inv } - # generate score vector + W <- as.matrix(Matrix::bdiag(W_list)) + K_inv <- as.matrix(Matrix::bdiag(K_inv_list)) + # score under the null U <- phi^(-1) * t(X_alt) %*% W %*% K_inv %*% r_null - # generate variance of score vector and invert it - V_U <- phi^(-2) * t(X_alt) %*% W %*% X_alt + # Generate variance of score vector under the null + V_U <- t(X_alt) %*% W %*% X_alt V_U_inv <- try({ eigenMapMatrixInvert(V_U) }, silent = TRUE) if (inherits(V_U_inv, "try-error")) { V_U_inv <- eigenMapPseudoInverse(V_U) } + VarM_hat <- phi * V_U_inv + Lpmat <- diag(1, nrow = p_alt, ncol = p_alt) + Lpmat <- Lpmat[-1,,drop=FALSE] + Lmat <- t(Lpmat) + mid <- Lpmat %*% VarM_hat %*% Lmat + mid_inv <- try({ eigenMapMatrixInvert(mid) }, silent = TRUE) + if (inherits(mid_inv, "try-error")) { + mid_inv <- eigenMapPseudoInverse(mid) + } + full_mid <- VarM_hat %*% Lmat %*% mid_inv %*% Lpmat %*% VarM_hat # estimate test statistic and accompanying p-value - S <- t(U) %*% V_U_inv %*% U - S_df <- ncol(X_alt) - ncol(X_null) - p_value <- 1 - stats::pchisq(S, df = S_df) + S <- t(U) %*% full_mid %*% U + S_df = p_alt - 1 + p_value <- 1 - stats::pchisq(S, df = S_df) # r from L matrix + # format results res <- list(Score_Stat = S, DF = S_df, P_Val = p_value, diff --git a/R/summary.R b/R/summary.R index 98e4bdd..aee682e 100644 --- a/R/summary.R +++ b/R/summary.R @@ -1,20 +1,20 @@ -#' Summary method for scLANE objects. -#' +#' Summary method for scLANE objects. +#' #' @name summary.scLANE #' @author Jack R. Leary -#' @importFrom purrr map reduce -#' @importFrom stats p.adjust -#' @param test.dyn.res The nested list returned by \code{\link{testDynamic}}. Defaults to NULL. -#' @param (Optional) The method used to adjust \emph{p}-values for multiple hypothesis testing. Defaults to "fdr". +#' @importFrom purrr map reduce +#' @importFrom stats p.adjust +#' @param test.dyn.res The nested list returned by \code{\link{testDynamic}}. Defaults to NULL. +#' @param p.adj.method (Optional) The method used to adjust \emph{p}-values for multiple hypothesis testing. Defaults to "fdr". #' @param fdr.cutoff (Optional) The FDR threshold for determining statistical significance. Defaults to 0.01. -#' @return A summary list with aggregated statistics concerning the trajectory DE tests from \code{scLANE}. +#' @return A summary list with aggregated statistics concerning the trajectory DE tests from \code{scLANE}. #' @export #' @examples #' data(scLANE_models) #' summary(scLANE_models) -summary.scLANE <- function(test.dyn.res = NULL, - p.adj.method = "fdr", +summary.scLANE <- function(test.dyn.res = NULL, + p.adj.method = "fdr", fdr.cutoff = 0.01) { if (!inherits(test.dyn.res, "scLANE")) { stop("The input must be an object of class 'scLANE'.") } summary_stats <- list() @@ -29,18 +29,18 @@ summary.scLANE <- function(test.dyn.res = NULL, adj_p_values <- stats::p.adjust(sort(p_values), method = p.adj.method) summary_stats$n_significant_genes <- sum(adj_p_values < fdr.cutoff, na.rm = TRUE) summary_stats$mean_adj_p_value <- mean(adj_p_values, na.rm = TRUE) - summary_stats$test_type <- ifelse(test.dyn.res[[1]][[1]]$Test_Stat_Type == "LRT", - "Likelihood Ratio Test", + summary_stats$test_type <- ifelse(test.dyn.res[[1]][[1]]$Test_Stat_Type == "LRT", + "Likelihood Ratio Test", ifelse(test.dyn.res[[1]][[1]]$Test_Stat_Type == "Wald", "Wald Test", "Score Test")) class(summary_stats) <- "summary.scLANE" return(summary_stats) } -#' Print method for summary.scLANE objects. -#' +#' Print method for summary.scLANE objects. +#' #' @name print.summary.scLANE #' @author Jack R. Leary -#' @param x An object of class summary.scLANE. +#' @param x An object of class summary.scLANE. #' @export print.summary.scLANE <- function(x) { diff --git a/R/testDynamic.R b/R/testDynamic.R index 25befb4..2737529 100644 --- a/R/testDynamic.R +++ b/R/testDynamic.R @@ -25,7 +25,7 @@ #' @param is.gee Should a GEE framework be used instead of the default GLM? Defaults to FALSE. #' @param cor.structure If the GEE framework is used, specifies the desired working correlation structure. Must be one of "ar1", "independence", or "exchangeable". Defaults to "ar1". #' @param gee.bias.correction.method (Optional) Specify which small-sample bias correction to be used on the sandwich variance-covariance matrix prior to test statistic estimation. Options are "kc" and "df". Defaults to NULL, indicating the use of the model-based variance. -#' @param gee.test A string specifying the type of test used to estimate the significance of the full model. Must be one of "wald" or "score". Defaults to "score". +#' @param gee.test A string specifying the type of test used to estimate the significance of the full model. Must be one of "wald" or "score". Defaults to "wald". #' @param is.glmm Should a GLMM framework be used instead of the default GLM? Defaults to FALSE. #' @param id.vec If a GEE or GLMM framework is being used, a vector of subject IDs to use as input to \code{\link[geeM]{geem}} or \code{\link[glmmTMB]{glmmTMB}}. Defaults to NULL. #' @param glmm.adaptive (Optional) Should the basis functions for the GLMM be chosen adaptively? If not, uses 4 evenly spaced knots. Defaults to TRUE. @@ -64,7 +64,7 @@ testDynamic <- function(expr.mat = NULL, is.gee = FALSE, cor.structure = "ar1", gee.bias.correction.method = NULL, - gee.test = "score", + gee.test = "wald", is.glmm = FALSE, glmm.adaptive = TRUE, id.vec = NULL, diff --git a/R/waldTestGEE.R b/R/waldTestGEE.R index b9e5eae..85545ff 100644 --- a/R/waldTestGEE.R +++ b/R/waldTestGEE.R @@ -42,7 +42,6 @@ waldTestGEE <- function(mod.1 = NULL, if (!(correction.method %in% c("df", "kc"))) { stop("Unsupported bias correction method in waldTestGEE().") } if (correction.method == "kc" && is.null(id.vec)) { stop("The Kauermann and Carroll bias correction method requires a vector of subject IDs.") } } - mod.1 <- mod.1$final_mod if (!(inherits(mod.1, "geem") && inherits(mod.0, "geem"))) { stop("You must provide two geeM models to waldTestGee().") } if (length(coef(mod.0)) != 1) { stop("Null GEE model must be intercept-only.") } @@ -53,15 +52,8 @@ waldTestGEE <- function(mod.1 = NULL, P_Val = 1, Notes = NA_character_) } else { - # compute test statistic & asymptotic p-value - coef_alt_mod <- names(coef(mod.1)) - coef_null_mod <- names(coef(mod.0)) - coef_diff <- setdiff(coef_alt_mod, coef_null_mod) - coef_idx <- rep(0, length(coef_diff)) - for (i in seq_len(length(coef_diff))) { - coef_idx[i] <- which(coef_diff[i] == coef_alt_mod) - } - coef_vals <- as.matrix(coef(mod.1)[coef_idx]) + # compute test statistic, optionally bias-adjust, & estimate asymptotic p-value + coef_vals <- as.matrix(coef(mod.1)) if (!is.null(correction.method)) { vcov_mat <- as.matrix(mod.1$var) vcov_mat <- biasCorrectGEE(mod.1, @@ -72,24 +64,28 @@ waldTestGEE <- function(mod.1 = NULL, } else { vcov_mat <- as.matrix(mod.1$naiv.var) } - vcov_mat <- vcov_mat[coef_idx, coef_idx] - wald_test_stat <- try({ - vcov_mat_inv <- eigenMapMatrixInvert(vcov_mat, n_cores = 1L) - if (inherits(vcov_mat_inv, "try-error")) { - vcov_mat_inv <- eigenMapPseudoInverse(vcov_mat, n_cores = 1L) - } - as.numeric(crossprod(coef_vals, vcov_mat_inv) %*% coef_vals) - }, silent = TRUE) + p_alt <- length(coef(mod.1)) + Lpmat <- diag(1, nrow = p_alt, ncol = p_alt) + Lpmat <- Lpmat[-1,,drop=FALSE] + Lmat <- t(Lpmat) + middle <- Lpmat %*% vcov_mat %*% Lmat + middle_inv <- try({ eigenMapMatrixInvert(middle) }, silent = TRUE) + if (inherits(middle_inv, "try-error")) { + middle_inv <- eigenMapPseudoInverse(middle) + } + sides <- Lpmat %*% coef(mod.1) + wald_test_stat <- t(sides) %*% middle_inv %*% sides if (inherits(wald_test_stat, "try-error")) { wald_note <- wald_test_stat[1] # this is the error message wald_test_stat <- 0 p_value <- 1 } else { - p_value <- as.numeric(1 - stats::pchisq(wald_test_stat, df = length(coef_diff))) + p_value <- as.numeric(1 - stats::pchisq(wald_test_stat, df = p_alt - 1)) wald_note <- NA_character_ } + # format results res <- list(Wald_Stat = wald_test_stat, - DF = length(coef_diff), + DF = p_alt - 1, P_Val = p_value, Notes = wald_note) } diff --git a/man/summary.scLANE.Rd b/man/summary.scLANE.Rd index 0ec2b68..a939546 100644 --- a/man/summary.scLANE.Rd +++ b/man/summary.scLANE.Rd @@ -9,9 +9,9 @@ \arguments{ \item{test.dyn.res}{The nested list returned by \code{\link{testDynamic}}. Defaults to NULL.} -\item{fdr.cutoff}{(Optional) The FDR threshold for determining statistical significance. Defaults to 0.01.} +\item{p.adj.method}{(Optional) The method used to adjust \emph{p}-values for multiple hypothesis testing. Defaults to "fdr".} -\item{(Optional)}{The method used to adjust \emph{p}-values for multiple hypothesis testing. Defaults to "fdr".} +\item{fdr.cutoff}{(Optional) The FDR threshold for determining statistical significance. Defaults to 0.01.} } \value{ A summary list with aggregated statistics concerning the trajectory DE tests from \code{scLANE}. diff --git a/man/testDynamic.Rd b/man/testDynamic.Rd index d318d19..3319a7f 100644 --- a/man/testDynamic.Rd +++ b/man/testDynamic.Rd @@ -12,7 +12,7 @@ testDynamic( is.gee = FALSE, cor.structure = "ar1", gee.bias.correction.method = NULL, - gee.test = "score", + gee.test = "wald", is.glmm = FALSE, glmm.adaptive = TRUE, id.vec = NULL, @@ -38,7 +38,7 @@ testDynamic( \item{gee.bias.correction.method}{(Optional) Specify which small-sample bias correction to be used on the sandwich variance-covariance matrix prior to test statistic estimation. Options are "kc" and "df". Defaults to NULL, indicating the use of the model-based variance.} -\item{gee.test}{A string specifying the type of test used to estimate the significance of the full model. Must be one of "wald" or "score". Defaults to "score".} +\item{gee.test}{A string specifying the type of test used to estimate the significance of the full model. Must be one of "wald" or "score". Defaults to "wald".} \item{is.glmm}{Should a GLMM framework be used instead of the default GLM? Defaults to FALSE.}