Skip to content

Commit

Permalink
Merge pull request #97 from jr-leary7/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
jr-leary7 authored Jun 19, 2023
2 parents c611b46 + f8e0893 commit c73f4f6
Show file tree
Hide file tree
Showing 40 changed files with 1,301 additions and 829 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
.Rhistory
.RData
.Ruserdata
.Rbuildignore
inst/doc
codecov.yml
79 changes: 47 additions & 32 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,62 +1,77 @@
Package: scLANE
Type: Package
Title: Model gene expression over pseudotime with spline-based NB GLMs, GEEs, & GLMMs
Version: 0.6.2
Title: Model gene expression dynamics with spline-based NB GLMs, GEEs, & GLMMs
Version: 0.7.0
Authors@R: c(person(given = "Jack", family = "Leary", email = "[email protected]", role = c("aut", "cre")),
person(given = "Rhonda", family = "Bacher", email = "[email protected]", role = c("ctb", "fnd")))
Description: This package uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime / latent time. Currently supports negative binomial GLMs, GEEs, & GLMMs.
Description: This package uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime or latent time.
The modeling architectures currently supported are negative binomial GLMs, GEEs, & GLMMs.
Downstream analysis functionalities include model comparison, dynamic gene clustering, smoothed counts generation, gene set enrichment testing, & visualization.
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.1
Depends:
magrittr,
glm2
glm2,
magrittr
Imports:
dplyr,
foreach,
doParallel,
parallel,
geeM,
MASS,
ggplot2,
dplyr,
stats,
gamlss,
scales,
splines,
bigstatsr,
broom,
utils,
withr,
purrr,
tidyr,
geeM,
glmmTMB,
broom.mixed,
tidyselect,
furrr,
gamlss,
scales,
future,
ggplot2,
splines,
foreach,
glmmTMB,
parallel,
bigstatsr,
RcppEigen,
doParallel,
tidyselect,
broom.mixed,
Rcpp (>= 1.0.7)
URL: https://github.com/jr-leary7/scLANE
BugReports: https://github.com/jr-leary7/scLANE/issues
Suggests:
rmarkdown,
knitr,
testthat (>= 3.0.0),
covr,
ggh4x,
knitr,
irlba,
rlang,
igraph,
Seurat,
bluster,
cluster,
clusterProfiler,
igraph,
irlba,
slingshot,
msigdbr,
rmarkdown,
slingshot,
BiocGenerics,
BiocNeighbors,
rlang,
Seurat,
clusterProfiler,
testthat (>= 3.0.0),
SingleCellExperiment,
SummarizedExperiment,
BiocGenerics
SummarizedExperiment
VignetteBuilder: knitr
Config/testthat/edition: 3
LinkingTo:
RcppEigen,
Rcpp
Rcpp,
RcppEigen
biocViews:
RNASeq,
Software,
TimeCourse,
Sequencing,
Regression,
SingleCell,
Visualization,
GeneExpression,
Transcriptomics,
DifferentialExpression
7 changes: 6 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(clusterGenes)
export(createCellOffset)
export(enrichDynamicGenes)
export(extractBreakpoints)
export(fitGLMM)
Expand All @@ -25,7 +26,6 @@ importFrom(MASS,negative.binomial)
importFrom(MASS,theta.mm)
importFrom(Rcpp,sourceCpp)
importFrom(bigstatsr,as_FBM)
importFrom(broom,tidy)
importFrom(broom.mixed,tidy)
importFrom(doParallel,registerDoParallel)
importFrom(dplyr,across)
Expand Down Expand Up @@ -87,6 +87,8 @@ importFrom(parallel,stopCluster)
importFrom(purrr,discard)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_chr)
importFrom(purrr,map_dbl)
importFrom(purrr,map_dfr)
importFrom(purrr,pmap_dfc)
importFrom(purrr,reduce)
Expand All @@ -104,6 +106,7 @@ importFrom(stats,hclust)
importFrom(stats,kmeans)
importFrom(stats,lm.fit)
importFrom(stats,logLik)
importFrom(stats,offset)
importFrom(stats,p.adjust)
importFrom(stats,p.adjust.methods)
importFrom(stats,pchisq)
Expand All @@ -113,4 +116,6 @@ importFrom(stats,quantile)
importFrom(stats,setNames)
importFrom(tidyr,pivot_longer)
importFrom(tidyselect,everything)
importFrom(utils,tail)
importFrom(withr,with_output_sink)
useDynLib(scLANE, .registration = TRUE)
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# scLANE 0.6.3

* Added a `NEWS.md` file to track changes to the package.
31 changes: 24 additions & 7 deletions R/clusterGenes.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,44 @@
#' @importFrom purrr map discard map2 reduce
#' @importFrom stats setNames hclust cutree kmeans dist
#' @param test.dyn.res The list returned by \code{\link{testDynamic}} - no extra processing required. Defaults to NULL.
#' @param pt A data.frame containing the pseudotime or latent time estimates for each cell. Defaults to NULL.
#' @param size.factor.offset (Optional) An offset to be used to rescale the fitted values. Can be generated easily with \code{\link{createCellOffset}}. No need to provide if the GEE backend was used. Defaults to NULL.
#' @param clust.algo The clustering method to use. Can be one of "hclust", "kmeans", "leiden". Defaults to "leiden".
#' @param use.pca Should PCA be performed prior to clustering? Defaults to FALSE.
#' @param n.PC The number of principal components to use when performing dimension reduction prior to clustering. Defaults to 15.
#' @param lineages Should one or more lineages be isolated? If so, specify which one(s). Otherwise, all lineages will be clustered independently. Defaults to NULL.
#' @details
#' \itemize{
#' \item Due to some peculiarities of how the fitted values (on the link scale) are generated for \code{geeM} models, it's not necessary to multiply them by the offset as this is done internally. For GLM & GEE models, the opposite is true, and \code{size.factor.offset} must be provided in order to rescale the fitted values correctly.
#' }
#' @return A data.frame of with three columns: \code{Gene}, \code{Lineage}, and \code{Cluster}.
#' @seealso \code{\link{testDynamic}}
#' @seealso \code{\link{plotClusteredGenes}}
#' @export
#' @examples
#' \dontrun{
#' clusterGenes(gene_stats, clust.algo = "leiden")
#' clusterGenes(gene_stats, pt = pt_df)
#' clusterGenes(gene_stats,
#' pt = pt_df,
#' size.factor.offset = createCellOffset(sce_obj),
#' clust.algo = "kmeans",
#' use.pca = TRUE,
#' n.PC = 10,
#' lineages = "B")
#' clusterGenes(gene_stats, lineages = c("A", "C"))
#' clusterGenes(gene_stats,
#' pt = pt_df,
#' lineages = c("A", "C"))
#' }

clusterGenes <- function(test.dyn.res = NULL,
pt = NULL,
size.factor.offset = NULL,
clust.algo = "leiden",
use.pca = FALSE,
n.PC = 15,
lineages = NULL) {
# check inputs
if (is.null(test.dyn.res)) { stop("test.dyn.res must be supplied to clusterGenes().") }
if (is.null(test.dyn.res) || is.null(pt)) { stop("test.dyn.res & pt must be supplied to clusterGenes().") }
clust.algo <- tolower(clust.algo)
if (!clust.algo %in% c("hclust", "kmeans", "leiden")) { stop("clust.algo must be one of 'hclust', 'kmeans', or 'leiden'.") }
if ((use.pca & is.null(n.PC)) || (use.pca & n.PC <= 0)) { stop("n.PC must be a non-zero integer when clustering on principal components.") }
Expand All @@ -48,8 +60,13 @@ clusterGenes <- function(test.dyn.res = NULL,
purrr::discard(rlang::is_na) %>%
purrr::discard(\(p) rlang::inherits_only(p, "try-error")) %>%
purrr::map2(.y = names(.), function(x, y) {
t(as.data.frame(exp(x$marge_link_fit))) %>%
magrittr::set_rownames(y)
if (is.null(size.factor.offset)) {
t(as.data.frame(exp(x$marge_link_fit))) %>%
magrittr::set_rownames(y)
} else {
t(as.data.frame(exp(x$marge_link_fit)) * unname(size.factor.offset)[!is.na(pt[, l])]) %>%
magrittr::set_rownames(y)
}
}) %>%
purrr::reduce(rbind)
if (use.pca) {
Expand Down Expand Up @@ -105,12 +122,12 @@ clusterGenes <- function(test.dyn.res = NULL,
if (use.pca) {
clust_res <- stats::kmeans(fitted_vals_pca$x,
centers = k_to_use,
nstart = 5,
nstart = 10,
algorithm = "Hartigan-Wong")
} else {
clust_res <- stats::kmeans(fitted_vals_mat,
centers = k_to_use,
nstart = 5,
nstart = 10,
algorithm = "Hartigan-Wong")
}
gene_clusters <- data.frame(Gene = rownames(fitted_vals_mat),
Expand Down
36 changes: 36 additions & 0 deletions R/createCellOffset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#' Create an offset vector before modeling
#'
#' @name createCellOffset
#' @author Jack Leary
#' @description Creates a vector of per-cell size factors to be used as input to \code{\link{testDynamic}} as a model offset given a variety of inputs.
#' @param expr.mat Either a gene-by-cell (cells as columns) matrix of raw integer counts prior to any cell filtering, a \code{Seurat} object, or a \code{SingleCellExperiment} object. Defaults to NULL.
#' @param scale.factor The scaling factor use to multiply the sequencing depth factor for each cell. The default value is 1e4, which returns counts-per-10k.
#' @return A named numeric vector containing the computed size factor for each cell.
#' @seealso \code{\link{testDynamic}}
#' @seealso \code{\link{marge2}}
#' @seealso \code{\link[Seurat]{LogNormalize}}
#' @seealso \code{\link[scuttle]{computeLibraryFactors}}
#' @export
#' @examples
#' \dontrun{
#' createCellOffset(expr.mat = raw_counts)
#' createCellOffset(expr.mat = raw_counts, scale.factor = 1e5)
#' }

createCellOffset <- function(expr.mat = NULL, scale.factor = 1e4) {
# check inputs
if (is.null(expr.mat)) { stop("Please provide expr.mat to createCellOffset().") }
if (inherits(expr.mat, "SingleCellExperiment")) {
expr.mat <- as.matrix(BiocGenerics::counts(expr.mat))
} else if (inherits(expr.mat, "Seurat")) {
expr.mat <- as.matrix(Seurat::GetAssayData(expr.mat,
slot = "counts",
assay = Seurat::DefaultAssay(expr.mat)))
}
# compute per-cell size factors
cell_names <- colnames(expr.mat)
seq_depths <- colSums(expr.mat)
lib_size_factors <- scale.factor / seq_depths
names(lib_size_factors) <- cell_names
return(lib_size_factors)
}
5 changes: 3 additions & 2 deletions R/createSlopeTestData.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' @name createSlopeTestData
#' @author Jack Leary
#' @description Creates a data.frame of \code{marge} model breakpoints, \emph{p}-values, and other info.
#' @importFrom purrr map_dbl
#' @param marge.model A \code{marge} model object, like those returned from \code{\link{marge2}}. Defaults to NULL.
#' @param pt A data.frame containing pseudotime or latent time values. Defaults to NULL.
#' @param is.gee Was the GEE framework used? Defaults to FALSE.
Expand Down Expand Up @@ -33,7 +34,7 @@ createSlopeTestData <- function(marge.model = NULL,
p_vals <- NA_real_
mod_notes <- "MARGE model error"
} else {
if (!is.glmm && length(coef(marge.model$final_mod)) == 1) {
if (!is.glmm && length(marge.model$marge_coef_names) == 1) {
rounded_brkpts <- NA_real_
brkpts <- NA_real_
brkpt_dirs <- NA_character_
Expand All @@ -44,7 +45,7 @@ createSlopeTestData <- function(marge.model = NULL,
model_breakpoints_rounded <- extractBreakpoints(marge.model, directions = TRUE)
rounded_brkpts <- model_breakpoints_rounded$Breakpoint
brkpt_dirs <- model_breakpoints_rounded$Direction
brkpts <- sapply(rounded_brkpts, \(x) pt[, 1][which.min(abs(pt[, 1] - x))])
brkpts <- purrr::map_dbl(rounded_brkpts, \(x) pt[, 1][which.min(abs(pt[, 1] - x))])
# extract p-values for coefficients other than intercept
if (is.gee) {
p_vals <- summary(marge.model$final_mod)$p[-1]
Expand Down
11 changes: 5 additions & 6 deletions R/extractBreakpoints.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,14 @@ extractBreakpoints <- function(model = NULL, directions = TRUE) {
if (is.null(model)) { stop("Model input is missing from extractBreakpoints().") }
if (!inherits(model, "marge")) { stop("Model must be of class marge.") }
# find changepoints -> need to parse them out of coefficient names generated by marge2()
coef_names <- model$marge_coef_names
coef_names <- gsub("B_final", "", coef_names)
coef_names <- model$coef_names
coef_names <- coef_names[-which(coef_names == "Intercept")]
coef_names <- gsub("\\)", "", gsub("\\(", "", coef_names))
coef_nums <- gsub("[A-z]|[a-z]", "", coef_names)
brkpts_char <- gsub("-", "", gsub("_", "", coef_nums))
brkpt_df <- data.frame(Breakpoint = as.numeric(brkpts_char))
brkpt_df <- data.frame(Breakpoint = as.numeric(coef_nums))
if (directions) {
brkpt_dirs <- ifelse(grepl("_[A-Z]-.*", coef_names), "Right", "Left")
brkpt_dirs <- ifelse(grepl("h_[a-z].*_[0-9]", coef_names, ignore.case = TRUE),
"Right",
"Left")
brkpt_df$Direction <- brkpt_dirs
}
return(brkpt_df)
Expand Down
Loading

0 comments on commit c73f4f6

Please sign in to comment.