diff --git a/DESCRIPTION b/DESCRIPTION index aec8dd7..49e516f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: scLANE Type: Package Title: Model gene expression dynamics with spline-based NB GLMs, GEEs, & GLMMs -Version: 0.7.5 +Version: 0.7.6 Authors@R: c(person(given = "Jack", family = "Leary", email = "j.leary@ufl.edu", role = c("aut", "cre"), comment = c(ORCID = "0009-0004-8821-3269")), person(given = "Rhonda", family = "Bacher", email = "rbacher@ufl.edu", role = c("ctb", "fnd"), comment = c(ORCID = "0000-0001-5787-476X"))) Description: This package uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime or latent time. @@ -9,7 +9,7 @@ Description: This package uses truncated power basis spline models to build flex Downstream analysis functionalities include model comparison, dynamic gene clustering, smoothed counts generation, gene set enrichment testing, & visualization. License: MIT + file LICENSE Encoding: UTF-8 -LazyData: false +LazyData: true RoxygenNote: 7.2.1 Depends: glm2, diff --git a/NAMESPACE b/NAMESPACE index 894e167..63d2bc8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ importFrom(MASS,glm.nb) importFrom(MASS,negative.binomial) importFrom(MASS,theta.mm) importFrom(Matrix,colSums) +importFrom(Matrix,t) importFrom(Rcpp,sourceCpp) importFrom(bigstatsr,as_FBM) importFrom(broom.mixed,tidy) diff --git a/NEWS.md b/NEWS.md index 8bc93eb..afdf733 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# `scLANE` v0.7.6 + +* Added Zenodo tracking. +* Added simulated dataset to `data/` + # `scLANE` v0.7.5 * Preparing for BioConductor submission i.e., reformatting code, adding documentation, etc. diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..5ef1677 --- /dev/null +++ b/R/data.R @@ -0,0 +1,19 @@ +#' A \code{\link[SingleCellExperiment]{SingleCellExperiment}} object containing simulated counts. +#' +#' Data simulated using the \code{scaffold} R package for 50 dynamic and 50 static genes across 1200 cells from 3 subjects. +#' +#' @format An object of class \code{\link[SingleCellExperiment]{SingleCellExperiment}}. +#' @source https://www.rhondabacher.com/scaffold-vignette.pdf +#' @usage data(sim_counts) +"sim_counts" + +#' A data.frame containing ground-truth pseudotime. +#' +#' The true ordering of the 1200 cells contained in \code{sim_counts}. +#' +#' @format An object of class \code{data.frame} with 1200 rows and one variable: +#' \itemize{ +#' \item PT: the true pseudotime (0.0025--1) +#' } +#' @usage data(sim_pseudotime) +"sim_pseudotime" diff --git a/R/testDynamic.R b/R/testDynamic.R index 7256815..1a1b8cc 100644 --- a/R/testDynamic.R +++ b/R/testDynamic.R @@ -5,6 +5,7 @@ #' @description This function tests whether a NB \code{marge} model is better than a null (intercept-only) NB GLM using the Likelihood Ratio Test. In effect, the test tells us whether a gene's expression changes (in any way) over pseudotime. #' @import glm2 #' @import magrittr +#' @importFrom Matrix t #' @importFrom bigstatsr as_FBM #' @importFrom foreach foreach %dopar% registerDoSEQ #' @importFrom doParallel registerDoParallel @@ -45,34 +46,29 @@ #' @seealso \code{\link[glmmTMB]{glmmTMB}} #' @export #' @examples -#' \dontrun{ -#' testDynamic(expr.mat = raw_counts, -#' pt = pseudotime_df, -#' parallel.exec = TRUE) -#' testDynamic(expr.mat = sce_obj, -#' pt = slingshot_obj, -#' size.factor.offset = sizeFactors(sce_obj), -#' genes = rownames(sce_obj)[1:100]) -#' testDynamic(expr.mat = raw_counts, -#' pt = pseudotime_df, -#' parallel.exec = TRUE, -#' n.cores = 8, -#' n.potential.basis.fns = 7) -#' testDynamic(expr.mat = counts(sce_obj), -#' pt = pseudotime_df, +#' \donttest{ +#' data(sim_counts) +#' data(sim_pseudotime) +#' cell_offset <- createCellOffset(sim_counts) +#' testDynamic(sim_counts, +#' pt = sim_pseudotime, +#' size.factor.offset = cell_offset, +#' genes = sample(rownames(sim_counts), 20)) +#' testDynamic(sim_counts, +#' pt = sim_pseudotime, +#' size.factor.offset = cell_offset, #' is.gee = TRUE, -#' id.vec = colData(sce_obj)$subject_id, +#' id.vec = sim_counts$subject, #' cor.structure = "ar1", -#' parallel.exec = TRUE, -#' n.cores = 8, -#' n.potential.basis.fns = 7) -#' testDynamic(expr.mat = seu_obj, -#' pt = pseudotime_df, -#' parallel.exec = TRUE, -#' n.cores = 8, +#' genes = sample(rownames(sim_counts), 20)) +#' testDynamic(sim_counts, +#' pt = sim_pseudotime, +#' size.factor.offset = cell_offset, #' is.glmm = TRUE, -#' id.vec = seu_obj$subject_id) -#' } +#' glmm.adaptive = TRUE, +#' id.vec = sim_counts$subject, +#' genes = sample(rownames(sim_counts), 20)) +#'} testDynamic <- function(expr.mat = NULL, pt = NULL, @@ -98,19 +94,18 @@ testDynamic <- function(expr.mat = NULL, } if (inherits(expr.mat, "SingleCellExperiment")) { expr.mat <- BiocGenerics::counts(expr.mat)[genes, ] - expr.mat <- as.matrix(expr.mat) } else if (inherits(expr.mat, "Seurat")) { expr.mat <- Seurat::GetAssayData(expr.mat, slot = "counts", assay = Seurat::DefaultAssay(expr.mat)) - expr.mat <- as.matrix(expr.mat[genes, ]) + expr.mat <- expr.mat[genes, ] } else if (inherits(expr.mat, "dgCMatrix")) { - expr.mat <- as.matrix(expr.mat[genes, ]) + expr.mat <- expr.mat[genes, ] } else { expr.mat <- expr.mat[genes, ] } + expr.mat <- as.matrix(Matrix::t(expr.mat)) # transpose to dense cell x gene matrix if (!(inherits(expr.mat, "matrix") || inherits(expr.mat, "array"))) { stop("Input expr.mat must be coerceable to a matrix of integer counts.") } - expr.mat <- t(expr.mat) # transpose to cell x gene matrix # extract pseudotime dataframe if input is results from Slingshot if (inherits(pt, "SlingshotDataSet")) { diff --git a/data/sim_counts.rda b/data/sim_counts.rda new file mode 100644 index 0000000..9a28668 Binary files /dev/null and b/data/sim_counts.rda differ diff --git a/data/sim_pseudotime.rda b/data/sim_pseudotime.rda new file mode 100644 index 0000000..7e9f9c4 Binary files /dev/null and b/data/sim_pseudotime.rda differ diff --git a/man/sim_counts.Rd b/man/sim_counts.Rd new file mode 100644 index 0000000..8d11f2d --- /dev/null +++ b/man/sim_counts.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{sim_counts} +\alias{sim_counts} +\title{A \code{\link[SingleCellExperiment]{SingleCellExperiment}} object containing simulated counts.} +\format{ +An object of class \code{\link[SingleCellExperiment]{SingleCellExperiment}}. +} +\source{ +https://www.rhondabacher.com/scaffold-vignette.pdf +} +\usage{ +data(sim_counts) +} +\description{ +Data simulated using the \code{scaffold} R package for 50 dynamic and 50 static genes across 1200 cells from 3 subjects. +} +\keyword{datasets} diff --git a/man/sim_pseudotime.Rd b/man/sim_pseudotime.Rd new file mode 100644 index 0000000..c75d4ec --- /dev/null +++ b/man/sim_pseudotime.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{sim_pseudotime} +\alias{sim_pseudotime} +\title{A data.frame containing ground-truth pseudotime.} +\format{ +An object of class \code{data.frame} with 1200 rows and one variable: +\itemize{ + \item PT: the true pseudotime (0.0025--1) +} +} +\usage{ +data(sim_pseudotime) +} +\description{ +The true ordering of the 1200 cells contained in \code{sim_counts}. +} +\keyword{datasets} diff --git a/man/testDynamic.Rd b/man/testDynamic.Rd index e4d043e..37e034c 100644 --- a/man/testDynamic.Rd +++ b/man/testDynamic.Rd @@ -66,33 +66,28 @@ This function tests whether a NB \code{marge} model is better than a null (inter } } \examples{ -\dontrun{ -testDynamic(expr.mat = raw_counts, - pt = pseudotime_df, - parallel.exec = TRUE) -testDynamic(expr.mat = sce_obj, - pt = slingshot_obj, - size.factor.offset = sizeFactors(sce_obj), - genes = rownames(sce_obj)[1:100]) -testDynamic(expr.mat = raw_counts, - pt = pseudotime_df, - parallel.exec = TRUE, - n.cores = 8, - n.potential.basis.fns = 7) -testDynamic(expr.mat = counts(sce_obj), - pt = pseudotime_df, +\donttest{ +data(sim_counts) +data(sim_pseudotime) +cell_offset <- createCellOffset(sim_counts) +testDynamic(sim_counts, + pt = sim_pseudotime, + size.factor.offset = cell_offset, + genes = sample(rownames(sim_counts), 20)) +testDynamic(sim_counts, + pt = sim_pseudotime, + size.factor.offset = cell_offset, is.gee = TRUE, - id.vec = colData(sce_obj)$subject_id, + id.vec = sim_counts$subject, cor.structure = "ar1", - parallel.exec = TRUE, - n.cores = 8, - n.potential.basis.fns = 7) -testDynamic(expr.mat = seu_obj, - pt = pseudotime_df, - parallel.exec = TRUE, - n.cores = 8, + genes = sample(rownames(sim_counts), 20)) +testDynamic(sim_counts, + pt = sim_pseudotime, + size.factor.offset = cell_offset, is.glmm = TRUE, - id.vec = seu_obj$subject_id) + glmm.adaptive = TRUE, + id.vec = sim_counts$subject, + genes = sample(rownames(sim_counts), 20)) } } \seealso{