From b252533bedf124aaac76900b5292e78af1b7c114 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Tue, 12 Dec 2023 16:07:51 -0500 Subject: [PATCH 1/3] minor updates --- .github/workflows/bioc-check.yaml | 1 + .gitignore | 4 ++++ R/embedGenes.R | 10 ++++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/bioc-check.yaml b/.github/workflows/bioc-check.yaml index 163fbf1..96ec487 100644 --- a/.github/workflows/bioc-check.yaml +++ b/.github/workflows/bioc-check.yaml @@ -36,5 +36,6 @@ jobs: shell: Rscript {0} - name: Check BiocCheck results run: | + echo "total errors: $BC_ERRORS_TOTAL" if [[ $BC_ERRORS_TOTAL -gt 0 ]]; then exit 1; else echo "BiocCheck passed!"; fi shell: bash diff --git a/.gitignore b/.gitignore index 9e95317..ed47a3d 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,10 @@ .Ruserdata .Rbuildignore inst/doc +inst/.DS_Store +inst/rmarkdown/.DS_Store +inst/rmarkdown/templates/.DS_Store +inst/rmarkdown/templates/Bacher_Group_HTML/.DS_Store codecov.yml /doc/ /Meta/ diff --git a/R/embedGenes.R b/R/embedGenes.R index e75afdc..1c2c0f5 100644 --- a/R/embedGenes.R +++ b/R/embedGenes.R @@ -56,7 +56,8 @@ embedGenes <- function(smoothed.counts = NULL, init = "spectral", nn_method = "annoy", seed = random.seed, - n_threads = n.cores) + n_threads = n.cores, + verbose = FALSE) } else { smoothed_counts_umap <- uwot::umap(smoothed.counts, n_components = 2, @@ -65,7 +66,8 @@ embedGenes <- function(smoothed.counts = NULL, init = "spectral", nn_method = "annoy", seed = random.seed, - n_threads = n.cores) + n_threads = n.cores, + verbose = FALSE) } # clustering w/ silhouette score parameter tuning if (cluster.genes) { @@ -74,13 +76,13 @@ embedGenes <- function(smoothed.counts = NULL, k = k.param, type = "jaccard", BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"), - BPPARAM = BiocParallel::SnowParam(workers = n.cores)) + BPPARAM = BiocParallel::SnowParam(workers = n.cores, RNGseed = random.seed)) } else { smoothed_counts_snn <- bluster::makeSNNGraph(smoothed.counts, k = k.param, type = "jaccard", BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"), - BPPARAM = BiocParallel::SnowParam(workers = n.cores)) + BPPARAM = BiocParallel::SnowParam(workers = n.cores, RNGseed = random.seed)) } if (is.null(resolution.param)) { if (pca.init) { From 2258b79448b1bcb65259e53e0e8b58b624dc80b8 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Wed, 13 Dec 2023 12:30:36 -0500 Subject: [PATCH 2/3] removing files that shouldnt be tracked for BiocCheck --- .DS_Store | Bin 10244 -> 0 bytes .Rbuildignore | 5 ----- .gitignore | 1 + R/.DS_Store | Bin 6148 -> 0 bytes R/{GetResultsDE.R => getResultsDE.R} | 0 R/{ModelLRT.R => modelLRT.R} | 0 codecov.yml | 20 ------------------ inst/.DS_Store | Bin 6148 -> 0 bytes inst/rmarkdown/.DS_Store | Bin 6148 -> 0 bytes inst/rmarkdown/templates/.DS_Store | Bin 6148 -> 0 bytes .../templates/Bacher_Group_HTML/.DS_Store | Bin 6148 -> 0 bytes scLANE.Rproj | 20 ------------------ 12 files changed, 1 insertion(+), 45 deletions(-) delete mode 100644 .DS_Store delete mode 100644 .Rbuildignore delete mode 100644 R/.DS_Store rename R/{GetResultsDE.R => getResultsDE.R} (100%) rename R/{ModelLRT.R => modelLRT.R} (100%) delete mode 100644 codecov.yml delete mode 100644 inst/.DS_Store delete mode 100644 inst/rmarkdown/.DS_Store delete mode 100644 inst/rmarkdown/templates/.DS_Store delete mode 100644 inst/rmarkdown/templates/Bacher_Group_HTML/.DS_Store delete mode 100644 scLANE.Rproj diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index ee60853be1803afc81904f8fd6a865fc39400c2f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10244 zcmeHMYitzP6+UOZgjq)5F~xw_!MntQ@K}h~25giE>jw!26FYuj0|{n!XE3A8&TMwZ zHk6XOMtif1&+IG|QV?{bQ!@68iMxczq^@Ko(UWTpTFqgfyC$C@I^8AK|_b^luo^``) zB{D}<=SBy13=9tK9M-f^&+IjA-M7Zf4&67sVXe~J?~Zq7T&E{*j#=jTV8*pavkqyh zGP<1^C1bPIw)?Cf3hPYXwe23)vwX{Sb`JUG0iWquLq)2gU7nlw+j(O{7PT*|=Cc^x zIp*x`F!LVCNaU4yi*8)HY}GC6Hr~;GS69#ZO08=C4;HBEF3T_--`Z!H`GIlE@b~Z3 zy^QG?mNPOqW_ecHHv1htYgQpO!o3#PM59BaY3qSe%Ri{qD+;e&QWsV4HK-%oU4yk| zXs;>4{-(NUbT`wq=S{jBbF7o zXteH5McK);t%hMzJgurS+8N!ZXy=q&-hMrAc8@z|zEIz-D10EPp0&2wuzYt?*~gTc z<&0TAjXRA4iBV6fFU;ur)C%!fgiw8|LDXtUo5&JqfHv3)dmsk~;V2w~lkjtR4Nk)u z_!C@&_u)hM6#fmD;S2Z@!#Ec!QNt>X;?1}cSK(@0hbe5uHr$3?xE;H(AMe9GIE*^( zLl<-S0D5>3kKjZ2Fg}V;;?wvHK8w%c3-}7Yh40|I_$&N9{sI4p7w~<&gnz@o=}95*gNN4jpRTxFHd5 zYHmE7h#yKOTUrwF^(~Eu4@Z>h`de=89lqbrI_^>7i-i{F7TOBr6C^7j3pzOVNV1QH zTI{3?F7BPuWZjMqvD1RG>CKbJcK`THtE@Gypv~={Y&CcXW!ipEEtN|dfieRB?+CE( zL!2%qQ-Pcil7Bkr#wY~SGYY{AJ!|ZNWIT|mKu!or8Y(DUNeWlCJBWJjPWAENcPfw* zLJD_49+}z+B4d!Dpl}C_b92CykmY+BfieO!5m-c32#ovZ1fvL@Nasp%|Jat!EnBxW zU$@-<;yl6js~0YcvlO7^_O?l)EU0yXP0Ku;6Z0a%%85D82Bxv<^OO_R-l*2a28g+v z2c}X8kx~>A32ipx=nA)9s@BJ}S;Xzd&*kchn05p4dgXj=v}SQE5nHQJ2BC$xaYL+8 z)hZ}ih;Xf%h&5B{fLymxy;ar1lr{vqO=>GeaUvRsmO5g0D#ZFV<*MoU70LMT;2iuJ zF2O&EjQ~};u(wo5YtsA zz+E4F^w+l^l>k)!M(#ilYxxDd0t@-QL=@wHnwzNeN(6E=>PswMn6P!&_wQOS#HRJ@6Y(|cll*|HDVa>fo7W}3#sSr2hhzw0_1kCV&dMM@ zSre=I#R{{V z5R$vljhXO559}zO{})X44M8>YNwH^=6GGAxmH+u40atPSQr`cs%hRm#{?Av;H*@H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0*@jCwb{&M4LT3tG9~faA*kNo$vQZy8|Nvkp=D-|{ zR$B%v1IrAo>t>7Y|0kEf|CgO?%Q9dYSSbcrW9SYC7?QbLE5XrSE3h15AtAd`qjEuI ju47f9tN1n+B{(Lig6J45HDV8n{Si4Map zWff7FIZ_CrgaH)MWFeXj|04r*cAh!2A>{Dj{E9qEb%fsQC{L53+}^$wwUzp-)it>$ z*X3z&p(jBZmgAxuj=t0ANb5A3_rvI0GMMxmZx3`-BV93dFlEr$`)#EHK zOm1Ku(vx1l@qRjO?|yEooiFWKQ%zf~PN%6pb=tF;C*OVexc9aHGaY97(()Q1^O{010(4kRPsI!{122OCx*rrOq3^J=Xs!-yObLKkN3#*^ zQ!PO`%AxPDvWOlOVN(%ps&FNSu<2-5F3xvYS+waOT=5~?%EDDB!mN(-D-#aFx5zCs zzzjTQV8sp_^!^_{UH?B$;t?~z47?}?M7Vlx}MerLcGXA%Dd0 zaVAMcse%U)DKjwhlF3ZMye!EE0EE|XRRPKX;Gh!bN@%_i8Yi8Qob?b2Jx3e5(1ZiD zY9g8)|B(UOy9MwdfE&1jkNek$4({b4oWe02g@d#c#wx_Wb~Rpj!8nc1RK zbmp8Z|4t43UeN2OUeLXu-l<{J?&^f7)y9y^^C)Slfv5UO zE7iHa8E{HYsaakb4r@DG6}hop8&%}6TCLYBae}u}^FDq^)RW;?;HRi% z-r^XZvGIdaAN_6;tK=4=$1{y*U}OfE0cK!I7_i5iGdCsma2d=1Gw^#1(D@)y30;es zL49Y`}hCnBjsnZfrIZ0M^PW9cehM%99TNex8TVrCFMDEuR!X<)+){3!!(Oqo(L diff --git a/inst/rmarkdown/templates/Bacher_Group_HTML/.DS_Store b/inst/rmarkdown/templates/Bacher_Group_HTML/.DS_Store deleted file mode 100644 index a5921d1d7abb2c00cd10612e4ff5facd20e49378..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5T32oZYe?!3VK`cTF_S1ikDF93mDOZN^NM+(3mYvY7V84yS|Vw;`2DO zyA`Agov zHq`!6Yt~THX0zRHsDpNEHml0bo!yi3-Y|Yl^s{AC;D={q-QgTw&{){mlRr*ko!nz= zUN_HYWCoZ4W?+dJa7Uo5EzzENtIPm1@G}PJd{C%_p2NbTIXZBl-$#m<2}#hVy9A+f z=s7GbVgyClQbb!S+!aIEa`Y>g=Q%7a+Hw$PWqgiXS-2aDFsq|qnRF1IMIM;}X5cde zD|T9^{(tuU{r__j&zJ#b;9oHy%3Z(P!7aJoy0AIwwF>nPm4xyNiytLum{yFr)QY!I aji6tVf#^9bETRX6e*_E-JTL>l%D_9kP)&3I diff --git a/scLANE.Rproj b/scLANE.Rproj deleted file mode 100644 index 497f8bf..0000000 --- a/scLANE.Rproj +++ /dev/null @@ -1,20 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -AutoAppendNewline: Yes -StripTrailingWhitespace: Yes - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source From 22fac1975c9224c4a2c60be86d859a312fe0b7dc Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Wed, 13 Dec 2023 14:20:43 -0500 Subject: [PATCH 3/3] added geneProgramDrivers function -- closes #166 --- .gitignore | 5 ++- NAMESPACE | 2 + R/geneProgramDrivers.R | 71 ++++++++++++++++++++++++++++++++++++ R/geneProgramScoring.R | 3 +- man/geneProgramDrivers.Rd | 56 ++++++++++++++++++++++++++++ man/geneProgramScoring.Rd | 5 ++- tests/testthat/test_scLANE.R | 14 +++++++ 7 files changed, 152 insertions(+), 4 deletions(-) create mode 100644 R/geneProgramDrivers.R create mode 100644 man/geneProgramDrivers.Rd diff --git a/.gitignore b/.gitignore index be6819e..0e6226c 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,6 @@ codecov.yml /Meta/ R/.DS_Store .Rproj -src/*.o -src/*.so +src/RcppExports.o +src/eigenMapMatMult.o +src/scLANE.so diff --git a/NAMESPACE b/NAMESPACE index 9bf61ff..b08da02 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(embedGenes) export(enrichDynamicGenes) export(extractBreakpoints) export(fitGLMM) +export(geneProgramDrivers) export(geneProgramScoring) export(getFittedValues) export(getKnotDist) @@ -114,6 +115,7 @@ importFrom(stats,as.dist) importFrom(stats,as.formula) importFrom(stats,coef) importFrom(stats,convolve) +importFrom(stats,cor.test) importFrom(stats,cutree) importFrom(stats,deviance) importFrom(stats,fitted) diff --git a/R/geneProgramDrivers.R b/R/geneProgramDrivers.R new file mode 100644 index 0000000..736ee50 --- /dev/null +++ b/R/geneProgramDrivers.R @@ -0,0 +1,71 @@ +#' Identify driver genes for a given gene program. +#' +#' @name geneProgramDrivers +#' @author Jack Leary +#' @importFrom Matrix Matrix +#' @importFrom purrr map reduce +#' @importFrom stats cor.test p.adjust +#' @importFrom dplyr arrange desc mutate filter +#' @description This function computes the correlation +#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of normalized counts with genes as rows & cells as columns. Defaults to NULL. +#' @param genes A character vector of genes to test. Defaults to NULL. +#' @param gene.program A vector of program scores as returned by \code{\link{geneProgramScoring}}. Defaults to NULL. +#' @param cor.method (Optional) The correlation method to be used. Defaults to "spearman". +#' @param fdr.cutoff (Optional) The FDR threshold for determining statistical significance. Defaults to 0.01. +#' @return Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix. +#' @seealso \code{\link{geneProgramScoring}} +#' @seealso \code{\link[stats]{cor.test}} +#' @export +#' @examples +#' data(sim_counts) +#' data(scLANE_models) +#' data(sim_pseudotime) +#' smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, +#' pt = sim_pseudotime, +#' n.cores = 1L) +#' gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L) +#' sim_counts <- geneProgramScoring(sim_counts, +#' genes = gene_embed$gene, +#' gene.clusters = gene_embed$leiden, +#' n.cores = 1L) +#' program_drivers <- geneProgramDrivers(sim_counts, +#' genes = gene_embed$gene, +#' gene.program = sim_counts$cluster_0, +#' fdr.cutoff = 0.05) + +geneProgramDrivers <- function(expr.mat = NULL, + genes = NULL, + gene.program = NULL, + cor.method = "spearman", + fdr.cutoff = 0.01) { + # check inputs + if (is.null(expr.mat) || is.null(genes) || is.null(gene.program)) { stop("Arguments to geneProgramDrivers() are missing.") } + # set up counts matrix + if (inherits(expr.mat, "SingleCellExperiment")) { + counts_matrix <- SingleCellExperiment::logcounts(expr.mat) + } else if (inherits(expr.mat, "Seurat")) { + counts_matrix <- Seurat::GetAssayData(expr.mat, + slot = "data", + assay = Seurat::DefaultAssay(expr.mat)) + } else if (inherits(expr.mat, "dgCMatrix")) { + counts_matrix <- Matrix::Matrix(expr.mat, sparse = FALSE) + } + # iteratively compute correlations + cor_tests <- purrr::map(genes, \(g) { + cor_res <- stats::cor.test(counts_matrix[g, ], + gene.program, + method = "spearman", + exact = FALSE) + cor_df <- data.frame(gene = g, + corr = unname(cor_res$estimate), + pvalue = cor_res$p.value) + return(cor_df) + }) + cor_tests <- purrr::reduce(cor_tests, rbind) + cor_tests <- dplyr::arrange(cor_tests, + pvalue, + dplyr::desc(abs(corr))) %>% + dplyr::mutate(pvalue_adj = stats::p.adjust(pvalue, method = "holm")) %>% + dplyr::filter(pvalue_adj < fdr.cutoff) + return(cor_tests) +} diff --git a/R/geneProgramScoring.R b/R/geneProgramScoring.R index 5bd091b..c09f8f5 100644 --- a/R/geneProgramScoring.R +++ b/R/geneProgramScoring.R @@ -3,13 +3,14 @@ #' @name geneProgramScoring #' @author Jack Leary #' @importFrom Matrix Matrix -#' @description This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the +#' @description This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the input matrix is a \code{Seurat} or \code{SingleCellExperiment} object, then the resulting scores will be added to the \code{meta.data} or the \code{colData} slot, respectively. Otherwise, a data.frame of the per-program scores is returned. #' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of integer-valued counts with genes as rows & cells as columns. Defaults to NULL. #' @param genes A character vector of gene IDs. Defaults to NULL. #' @param gene.clusters A factor containing the cluster assignment of each gene in \code{genes}. Defaults to NULL. #' @param program.labels (Optional) A character vector specifying a label for each gene cluster. Defaults to NULL. #' @param n.cores (Optional) The number of cores used under the hood in \code{\link[UCell]{ScoreSignatures_UCell}}. Defaults to 2. #' @return Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix. +#' @seealso \code{\link{geneProgramDrivers}} #' @export #' @examples #' data(sim_counts) diff --git a/man/geneProgramDrivers.Rd b/man/geneProgramDrivers.Rd new file mode 100644 index 0000000..6f18360 --- /dev/null +++ b/man/geneProgramDrivers.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geneProgramDrivers.R +\name{geneProgramDrivers} +\alias{geneProgramDrivers} +\title{Identify driver genes for a given gene program.} +\usage{ +geneProgramDrivers( + expr.mat = NULL, + genes = NULL, + gene.program = NULL, + cor.method = "spearman", + fdr.cutoff = 0.01 +) +} +\arguments{ +\item{expr.mat}{Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of normalized counts with genes as rows & cells as columns. Defaults to NULL.} + +\item{genes}{A character vector of genes to test. Defaults to NULL.} + +\item{gene.program}{A vector of program scores as returned by \code{\link{geneProgramScoring}}. Defaults to NULL.} + +\item{cor.method}{(Optional) The correlation method to be used. Defaults to "spearman".} + +\item{fdr.cutoff}{(Optional) The FDR threshold for determining statistical significance. Defaults to 0.01.} +} +\value{ +Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix. +} +\description{ +This function computes the correlation +} +\examples{ +data(sim_counts) +data(scLANE_models) +data(sim_pseudotime) +smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, + pt = sim_pseudotime, + n.cores = 1L) +gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L) +sim_counts <- geneProgramScoring(sim_counts, + genes = gene_embed$gene, + gene.clusters = gene_embed$leiden, + n.cores = 1L) +program_drivers <- geneProgramDrivers(sim_counts, + genes = gene_embed$gene, + gene.program = sim_counts$cluster_0, + fdr.cutoff = 0.05) +} +\seealso{ +\code{\link{geneProgramScoring}} + +\code{\link[stats]{cor.test}} +} +\author{ +Jack Leary +} diff --git a/man/geneProgramScoring.Rd b/man/geneProgramScoring.Rd index a807b12..e78a11f 100644 --- a/man/geneProgramScoring.Rd +++ b/man/geneProgramScoring.Rd @@ -27,7 +27,7 @@ geneProgramScoring( Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix. } \description{ -This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the +This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the input matrix is a \code{Seurat} or \code{SingleCellExperiment} object, then the resulting scores will be added to the \code{meta.data} or the \code{colData} slot, respectively. Otherwise, a data.frame of the per-program scores is returned. } \examples{ data(sim_counts) @@ -42,6 +42,9 @@ sim_counts <- geneProgramScoring(sim_counts, gene.clusters = gene_embed$leiden, n.cores = 1L) } +\seealso{ +\code{\link{geneProgramDrivers}} +} \author{ Jack Leary } diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R index b97b424..39e65fc 100644 --- a/tests/testthat/test_scLANE.R +++ b/tests/testthat/test_scLANE.R @@ -226,6 +226,13 @@ withr::with_output_sink(tempfile(), { sim_data_seu <- geneProgramScoring(sim_data_seu, genes = gene_embedding$gene, gene.clusters = gene_embedding$leiden) + # gene program drivers + program_drivers <- geneProgramDrivers(sim_data, + genes = gene_embedding$gene, + gene.program = sim_data$cluster_0) + program_drivers_seu <- geneProgramDrivers(sim_data_seu, + genes = gene_embedding$gene, + gene.program = sim_data_seu$cluster_0) # enrichment analysis gsea_res <- enrichDynamicGenes(glm_test_results, species = "hsapiens") # coefficients @@ -399,6 +406,13 @@ test_that("geneProgramScoring() output", { expect_equal(colnames(sim_data_seu@meta.data)[11], "cluster_1") }) +test_that("geneProgramDrivers() output", { + expect_s3_class(program_drivers, "data.frame") + expect_s3_class(program_drivers_seu, "data.frame") + expect_equal(ncol(program_drivers), 4) + expect_equal(ncol(program_drivers_seu), 4) +}) + test_that("sortGenesHeatmap() output", { expect_type(sorted_genes, "character") expect_length(sorted_genes, ncol(smoothed_counts$Lineage_A))