Skip to content

Commit

Permalink
added geneProgramDrivers function -- closes #166
Browse files Browse the repository at this point in the history
  • Loading branch information
jr-leary7 committed Dec 13, 2023
1 parent 2258b79 commit 22fac19
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 4 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ codecov.yml
/Meta/
R/.DS_Store
.Rproj
src/*.o
src/*.so
src/RcppExports.o
src/eigenMapMatMult.o
src/scLANE.so
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(embedGenes)
export(enrichDynamicGenes)
export(extractBreakpoints)
export(fitGLMM)
export(geneProgramDrivers)
export(geneProgramScoring)
export(getFittedValues)
export(getKnotDist)
Expand Down Expand Up @@ -114,6 +115,7 @@ importFrom(stats,as.dist)
importFrom(stats,as.formula)
importFrom(stats,coef)
importFrom(stats,convolve)
importFrom(stats,cor.test)
importFrom(stats,cutree)
importFrom(stats,deviance)
importFrom(stats,fitted)
Expand Down
71 changes: 71 additions & 0 deletions R/geneProgramDrivers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#' Identify driver genes for a given gene program.
#'
#' @name geneProgramDrivers
#' @author Jack Leary
#' @importFrom Matrix Matrix
#' @importFrom purrr map reduce
#' @importFrom stats cor.test p.adjust
#' @importFrom dplyr arrange desc mutate filter
#' @description This function computes the correlation
#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of normalized counts with genes as rows & cells as columns. Defaults to NULL.
#' @param genes A character vector of genes to test. Defaults to NULL.
#' @param gene.program A vector of program scores as returned by \code{\link{geneProgramScoring}}. Defaults to NULL.
#' @param cor.method (Optional) The correlation method to be used. Defaults to "spearman".
#' @param fdr.cutoff (Optional) The FDR threshold for determining statistical significance. Defaults to 0.01.
#' @return Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix.
#' @seealso \code{\link{geneProgramScoring}}
#' @seealso \code{\link[stats]{cor.test}}
#' @export
#' @examples
#' data(sim_counts)
#' data(scLANE_models)
#' data(sim_pseudotime)
#' smoothed_dynamics <- smoothedCountsMatrix(scLANE_models,
#' pt = sim_pseudotime,
#' n.cores = 1L)
#' gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L)
#' sim_counts <- geneProgramScoring(sim_counts,
#' genes = gene_embed$gene,
#' gene.clusters = gene_embed$leiden,
#' n.cores = 1L)
#' program_drivers <- geneProgramDrivers(sim_counts,
#' genes = gene_embed$gene,
#' gene.program = sim_counts$cluster_0,
#' fdr.cutoff = 0.05)

geneProgramDrivers <- function(expr.mat = NULL,
genes = NULL,
gene.program = NULL,
cor.method = "spearman",
fdr.cutoff = 0.01) {
# check inputs
if (is.null(expr.mat) || is.null(genes) || is.null(gene.program)) { stop("Arguments to geneProgramDrivers() are missing.") }
# set up counts matrix
if (inherits(expr.mat, "SingleCellExperiment")) {
counts_matrix <- SingleCellExperiment::logcounts(expr.mat)
} else if (inherits(expr.mat, "Seurat")) {
counts_matrix <- Seurat::GetAssayData(expr.mat,
slot = "data",
assay = Seurat::DefaultAssay(expr.mat))
} else if (inherits(expr.mat, "dgCMatrix")) {
counts_matrix <- Matrix::Matrix(expr.mat, sparse = FALSE)
}
# iteratively compute correlations
cor_tests <- purrr::map(genes, \(g) {
cor_res <- stats::cor.test(counts_matrix[g, ],
gene.program,
method = "spearman",
exact = FALSE)
cor_df <- data.frame(gene = g,
corr = unname(cor_res$estimate),
pvalue = cor_res$p.value)
return(cor_df)
})
cor_tests <- purrr::reduce(cor_tests, rbind)
cor_tests <- dplyr::arrange(cor_tests,
pvalue,
dplyr::desc(abs(corr))) %>%
dplyr::mutate(pvalue_adj = stats::p.adjust(pvalue, method = "holm")) %>%
dplyr::filter(pvalue_adj < fdr.cutoff)
return(cor_tests)
}
3 changes: 2 additions & 1 deletion R/geneProgramScoring.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
#' @name geneProgramScoring
#' @author Jack Leary
#' @importFrom Matrix Matrix
#' @description This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the
#' @description This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the input matrix is a \code{Seurat} or \code{SingleCellExperiment} object, then the resulting scores will be added to the \code{meta.data} or the \code{colData} slot, respectively. Otherwise, a data.frame of the per-program scores is returned.
#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of integer-valued counts with genes as rows & cells as columns. Defaults to NULL.
#' @param genes A character vector of gene IDs. Defaults to NULL.
#' @param gene.clusters A factor containing the cluster assignment of each gene in \code{genes}. Defaults to NULL.
#' @param program.labels (Optional) A character vector specifying a label for each gene cluster. Defaults to NULL.
#' @param n.cores (Optional) The number of cores used under the hood in \code{\link[UCell]{ScoreSignatures_UCell}}. Defaults to 2.
#' @return Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix.
#' @seealso \code{\link{geneProgramDrivers}}
#' @export
#' @examples
#' data(sim_counts)
Expand Down
56 changes: 56 additions & 0 deletions man/geneProgramDrivers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/geneProgramScoring.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/testthat/test_scLANE.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,13 @@ withr::with_output_sink(tempfile(), {
sim_data_seu <- geneProgramScoring(sim_data_seu,
genes = gene_embedding$gene,
gene.clusters = gene_embedding$leiden)
# gene program drivers
program_drivers <- geneProgramDrivers(sim_data,
genes = gene_embedding$gene,
gene.program = sim_data$cluster_0)
program_drivers_seu <- geneProgramDrivers(sim_data_seu,
genes = gene_embedding$gene,
gene.program = sim_data_seu$cluster_0)
# enrichment analysis
gsea_res <- enrichDynamicGenes(glm_test_results, species = "hsapiens")
# coefficients
Expand Down Expand Up @@ -399,6 +406,13 @@ test_that("geneProgramScoring() output", {
expect_equal(colnames(sim_data_seu@meta.data)[11], "cluster_1")
})

test_that("geneProgramDrivers() output", {
expect_s3_class(program_drivers, "data.frame")
expect_s3_class(program_drivers_seu, "data.frame")
expect_equal(ncol(program_drivers), 4)
expect_equal(ncol(program_drivers_seu), 4)
})

test_that("sortGenesHeatmap() output", {
expect_type(sorted_genes, "character")
expect_length(sorted_genes, ncol(smoothed_counts$Lineage_A))
Expand Down

0 comments on commit 22fac19

Please sign in to comment.