Skip to content

Commit

Permalink
prepping for BioConductor submission -- addresses #96
Browse files Browse the repository at this point in the history
  • Loading branch information
jr-leary7 committed Oct 19, 2023
1 parent 3889195 commit deed8d9
Show file tree
Hide file tree
Showing 22 changed files with 328 additions and 59 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.github$
^doc$
^Meta$
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
.Rbuildignore
inst/doc
codecov.yml
/doc/
/Meta/
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: scLANE
Type: Package
Title: Model gene expression dynamics with spline-based NB GLMs, GEEs, & GLMMs
Version: 0.7.4
Version: 0.7.5
Authors@R: c(person(given = "Jack", family = "Leary", email = "[email protected]", role = c("aut", "cre"), comment = c(ORCID = "0009-0004-8821-3269")),
person(given = "Rhonda", family = "Bacher", email = "[email protected]", role = c("ctb", "fnd"), comment = c(ORCID = "0000-0001-5787-476X")))
Description: This package uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime or latent time.
Expand Down Expand Up @@ -55,6 +55,7 @@ Suggests:
bluster,
cluster,
rmarkdown,
BiocStyle,
slingshot,
gprofiler2,
BiocGenerics,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ importFrom(stats,predict)
importFrom(stats,qnorm)
importFrom(stats,quantile)
importFrom(stats,setNames)
importFrom(stats,vcov)
importFrom(tidyr,pivot_longer)
importFrom(tidyselect,everything)
importFrom(utils,tail)
Expand Down
18 changes: 14 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,32 @@
# `scLANE` 0.7.3
# `scLANE` v0.7.5

* Preparing for BioConductor submission i.e., reformatting code, adding documentation, etc.
* Added convolution function `npConvolve()` to be used for e.g., heatmap smoothing.

# `scLANE` v0.7.4

* Added the `getKnotDist()` function to pull the set of empirically-identified knots for a user-provided gene set.
* Minor enhancements & documentation improvements.

# `scLANE` v0.7.3

* Added a function named `embedGenes()` that takes a smoothed counts matrix as input & returns PCA & UMAP embeddings along with a graph-based clustering.
* Updated the `clusterGenes()` function to be much more efficient as well as changing the distance metric used to be cosine dissimilarity.
* Added `theme_scLANE()` for output plots.
* Enhanced documentation.
* Increased test coverage.

# `scLANE` 0.7.2
# `scLANE` v0.7.2

* Added a function named `sortGenesHeatmap()` that aids in the creation of expression cascade heatmaps by sorting genes according to where in pseudotime their peak expression is.
* Changed the parameter `approx.knot` in the `testDynamic()` function to use (stochasticity-controlled) subsampling instead of `seq()` to reduce candidate knot space.
* Added `summarizeModels()` to sum up slopes across pseudotime intervals.

# `scLANE` 0.7.1
# `scLANE` v0.7.1

* Changed input format of all functions to allow counts matrices formatted as `SingleCellExperiment` or `Seurat` objects, sparse matrices, or dense matrices.
* Updated visualization functions to reflect changes made in `ggplot2` v3.4 (mostly changing the `size` parameter in line-based geoms to be `linewidth` instead).

# `scLANE` 0.6.3
# `scLANE` v0.6.3

* Added a `NEWS.md` file to track changes to the package.
2 changes: 1 addition & 1 deletion R/GetResultsDE.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ getResultsDE <- function(test.dyn.res = NULL,
function(x) {
purrr::map_dfr(x,
function(y) {
as.data.frame(rbind(y[1:12])) %>%
as.data.frame(rbind(y[seq(12)])) %>%
dplyr::mutate(dplyr::across(tidyselect::everything(), \(z) unname(unlist(z))))
})
}) %>%
Expand Down
7 changes: 4 additions & 3 deletions R/embedGenes.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ embedGenes <- function(smoothed.counts = NULL,
genes <- colnames(smoothed.counts)
smoothed.counts <- t(smoothed.counts)
# embeddings
set.seed(random.seed)
smoothed_counts_pca <- irlba::prcomp_irlba(smoothed.counts,
n = pc.embed,
center = TRUE,
Expand All @@ -52,14 +51,16 @@ embedGenes <- function(smoothed.counts = NULL,
metric = "cosine",
n_neighbors = k.param,
init = "spectral",
nn_method = "annoy")
nn_method = "annoy",
seed = random.seed)
} else {
smoothed_counts_umap <- uwot::umap(smoothed.counts,
n_components = 2,
metric = "cosine",
n_neighbors = k.param,
init = "spectral",
nn_method = "annoy")
nn_method = "annoy",
seed = random.seed)
}
# clustering w/ silhouette score parameter tuning
if (cluster.genes) {
Expand Down
10 changes: 5 additions & 5 deletions R/getFittedValues.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,16 @@ getFittedValues <- function(test.dyn.res = NULL,
}) %>%
purrr::reduce(rbind)
if (!is.null(id.vec)) {
mod_df$id <- id.vec[!is.na(x)]
mod_df <- dplyr::mutate(mod_df, subj_id = id.vec[!is.na(x)])
} else {
mod_df$id <- NA_character_
mod_df <- dplyr::mutate(mod_df, subj_id = NA_character_)
}
return(mod_df)
})
final_df <- purrr::reduce(mod_df_list, rbind) %>%
dplyr::relocate(cell, id, lineage)
if (all(is.na(final_df$id))) {
final_df <- dplyr::select(final_df, -id)
dplyr::relocate(cell, subj_id, lineage)
if (is.null(id.vec)) {
final_df <- dplyr::select(final_df, -subj_id)
}
if (!is.null(filter.lineage)) {
final_df <- dplyr::filter(final_df, !lineage %in% filter.lineage)
Expand Down
8 changes: 4 additions & 4 deletions R/marge2.R
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ marge2 <- function(X_pred = NULL,
if (utils::tail(max(score_knot_one_int_mat, na.rm = TRUE), n = 1) > utils::tail(max(score_knot_one_add_mat, na.rm = TRUE), n = 1)) {
int <- TRUE
score_knot <- score_knot_one_int_mat
temp <- utils::tail(which(utils::tail(max(round(score_knot, 6), na.rm = T), n = 1) == round(score_knot, 6), arr.ind = TRUE), n = 1)
temp <- utils::tail(which(utils::tail(max(round(score_knot, 6), na.rm = TRUE), n = 1) == round(score_knot, 6), arr.ind = TRUE), n = 1)
min_knot1 <- temp[1]
best.var <- temp[2]
} else {
Expand Down Expand Up @@ -784,7 +784,7 @@ marge2 <- function(X_pred = NULL,
wic_mat_2[1, (ncol_B + 1)] <- full.wic
wic1_2 <- backward_sel_WIC(Y = Y, B_new = B_new)
wic_mat_2[2, 2:(length(wic1_2) + 1)] <- wic1_2
WIC_2 <- sum(apply(wic_mat_2[1:2, ], 1, min, na.rm = TRUE)) + 2 * ncol(B_new)
WIC_2 <- sum(apply(wic_mat_2[seq(2), ], 1, min, na.rm = TRUE)) + 2 * ncol(B_new)
WIC_vec_2 <- c(WIC_vec_2, WIC_2)

variable.lowest_2 <- as.numeric(which(wic1_2 == min(wic1_2, na.rm = TRUE))[1])
Expand All @@ -795,14 +795,14 @@ marge2 <- function(X_pred = NULL,
wic1_2 <- backward_sel_WIC(Y = Y, B_new = B_new_2)
if (i != (ncol_B - 1)) {
wic_mat_2[(i + 1), colnames(B_new_2)[-1]] <- wic1_2
WIC_2 <- sum(apply(wic_mat_2[1:(i + 1), ], 1, min, na.rm = TRUE)) + 2 * ncol(B_new_2)
WIC_2 <- sum(apply(wic_mat_2[seq(i + 1), ], 1, min, na.rm = TRUE)) + 2 * ncol(B_new_2)
WIC_vec_2 <- c(WIC_vec_2, WIC_2)
variable.lowest_2 <- as.numeric(which(wic1_2 == min(wic1_2, na.rm = TRUE))[1])
var.low.vec_2 <- c(var.low.vec_2, colnames(B_new_2)[variable.lowest_2 + 1])
B_new_2 <- as.matrix(B_new_2[, -(variable.lowest_2 + 1)])
} else {
wic_mat_2[(i + 1), colnames(B_new_2)[-1]] <- wic1_2
WIC_2 <- sum(apply(wic_mat_2[1:(ncol_B), ], 1, min, na.rm = TRUE)) + 2 * ncol(B_new_2)
WIC_2 <- sum(apply(wic_mat_2[seq(ncol_B), ], 1, min, na.rm = TRUE)) + 2 * ncol(B_new_2)
WIC_vec_2 <- c(WIC_vec_2, WIC_2)
B_new_2 <- as.matrix(B_new_2[, -(variable.lowest_2)])
colnames(B_new_2) <- "Intercept"
Expand Down
2 changes: 1 addition & 1 deletion R/max_span.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ max_span <- function(X_red = NULL,
N <- length(unique(X_red))
x <- sort(unique(X_red))
maxspan <- round((3 - log2(alpha / q)))
x_new <- x[-c(1:maxspan, floor(N - maxspan + 1):N)]
x_new <- x[-c(seq(maxspan), floor(N - maxspan + 1):N)]
if (length(x_new) == 0) {
x_new <- x
}
Expand Down
6 changes: 3 additions & 3 deletions R/plotModels.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ plotModels <- function(test.dyn.res = NULL,
se = TRUE,
REML = FALSE)
}
glm_preds <- data.frame(predict(glm_mod, type = "link", se.fit = TRUE)[1:2])
glm_preds <- data.frame(predict(glm_mod, type = "link", se.fit = TRUE)[seq(2)])
} else {
glm_formula <- "COUNT ~ PT"
if (!is.null(size.factor.offset)) {
Expand All @@ -168,7 +168,7 @@ plotModels <- function(test.dyn.res = NULL,
method = "glm.fit2",
link = log,
init.theta = 1)
glm_preds <- data.frame(stats::predict(glm_mod, type = "link", se.fit = TRUE)[1:2])
glm_preds <- data.frame(stats::predict(glm_mod, type = "link", se.fit = TRUE)[seq(2)])
}
x <- dplyr::mutate(x,
RESP_GLM = glm_preds$fit,
Expand Down Expand Up @@ -202,7 +202,7 @@ plotModels <- function(test.dyn.res = NULL,
pt = x$PT)
}
}
gam_preds <- data.frame(predict(gam_mod, type = "link", se.fit = TRUE)[1:2])
gam_preds <- data.frame(predict(gam_mod, type = "link", se.fit = TRUE)[seq(2)])
x <- dplyr::mutate(x,
RESP_GAM = gam_preds$fit,
SE_GAM = gam_preds$se.fit)
Expand Down
8 changes: 4 additions & 4 deletions R/pull_sumy.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ pull.null.sumy <- function(mod.obj, is.gee, is.glmm) {
`p.value` = null_glmm_summary$p.value[1])
}, silent = TRUE)
null_pred_df <- try({
data.frame(predict(mod.obj, type = "link", se.fit = TRUE)[1:2]) %>%
data.frame(predict(mod.obj, type = "link", se.fit = TRUE)[seq(2)]) %>%
dplyr::rename(null_link_fit = fit, null_link_se = se.fit)
}, silent = TRUE)
} else {
null_sumy_df <- try({
as.data.frame(broom.mixed::tidy(mod.obj)) # saves a few bytes by converting from tibble
}, silent = TRUE)
null_pred_df <- try({
data.frame(stats::predict(mod.obj, type = "link", se.fit = TRUE)[1:2]) %>%
data.frame(stats::predict(mod.obj, type = "link", se.fit = TRUE)[seq(2)]) %>%
dplyr::rename(null_link_fit = fit, null_link_se = se.fit)
}, silent = TRUE)
}
Expand Down Expand Up @@ -87,7 +87,7 @@ pull.marge.sumy <- function(mod.obj,

} else if (is.glmm) {
marge_pred_df <- try({
data.frame(predict(mod.obj$final_mod, type = "link", se.fit = TRUE)[1:2]) %>%
data.frame(predict(mod.obj$final_mod, type = "link", se.fit = TRUE)[seq(2)]) %>%
dplyr::rename(marge_link_fit = fit, marge_link_se = se.fit)
}, silent = TRUE)
marge_sumy_df <- try({
Expand All @@ -101,7 +101,7 @@ pull.marge.sumy <- function(mod.obj,

} else {
marge_pred_df <- try({
data.frame(stats::predict(mod.obj$final_mod, type = "link", se.fit = TRUE)[1:2]) %>%
data.frame(stats::predict(mod.obj$final_mod, type = "link", se.fit = TRUE)[seq(2)]) %>%
dplyr::rename(marge_link_fit = fit, marge_link_se = se.fit)
}, silent = TRUE)
marge_sumy_df <- try({
Expand Down
6 changes: 3 additions & 3 deletions R/score_fun_gee.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ score_fun_gee <- function(Y = NULL,
J21 <- Sigma21 <- matrix(0, nrow = p, ncol = p1)

for (i in seq(N)) {
k <- sum(n_vec[1:i])
k <- sum(n_vec[seq(i)])
VS.est_i <- VS.est_list[[i]]
AWA.est_i <- AWA.est_list[[i]]
J2_i <- J2_list[[i]]
Sigma2_i <- Sigma2_list[[i]]
D.est_i <- eigenMapMatMult(A = diag((mu.est[(sum(n_vec1[1:i]) + 1):k]), nrow = n_vec[i], ncol = n_vec[i]),
B = XA[(sum(n_vec1[1:i]) + 1):k, ],
D.est_i <- eigenMapMatMult(A = diag((mu.est[(sum(n_vec1[seq(i)]) + 1):k]), nrow = n_vec[i], ncol = n_vec[i]),
B = XA[(sum(n_vec1[seq(i)]) + 1):k, ],
n_cores = 1)
D_est_i_transpose <- t(D.est_i)
J21 <- J21 + eigenMapMatMult(A = D_est_i_transpose,
Expand Down
2 changes: 1 addition & 1 deletion R/smoothedCountsMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ smoothedCountsMatrix <- function(test.dyn.res = NULL,
} else {
genes <- names(test.dyn.res)
}
lineages <- LETTERS[1:length(test.dyn.res[[1]])]
lineages <- LETTERS[seq(length(test.dyn.res[[1]]))]
colnames(pt) <- paste0("Lineage_", lineages)
lineage_mat_list <- purrr::map(lineages, \(x) {
lineage_name <- paste0("Lineage_", x)
Expand Down
6 changes: 3 additions & 3 deletions R/sortGenesHeatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,18 @@ sortGenesHeatmap <- function(heatmap.mat = NULL, pt.vec = NULL) {
# identify point at which peak expression occurs for each gene across pseudotime
gene_peak_order <- purrr::map(seq_len(ncol(heatmap.mat)), \(x) {
data.frame(gene = colnames(heatmap.mat)[x],
pt = pt.vec,
pseudotime = pt.vec,
mRNA = heatmap.mat[, x]) %>%
dplyr::filter(mRNA == max(mRNA)) %>%
dplyr::distinct() %>%
dplyr::select(gene,
pt,
pseudotime,
mRNA)
}) %>%
purrr::reduce(rbind) %>%
dplyr::with_groups(gene,
dplyr::summarise,
mu = mean(pt)) %>%
mu = mean(pseudotime)) %>%
dplyr::arrange(mu) %>%
dplyr::pull(gene)
return(gene_peak_order)
Expand Down
6 changes: 3 additions & 3 deletions R/stat_out_score_gee_null.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,19 @@ stat_out_score_gee_null <- function(Y = NULL,
J11 <- Sigma11 <- matrix(0, ncol(B_null), ncol(B_null))

for (i in seq(N)) {
k <- sum(n_vec[1:i])
k <- sum(n_vec[seq(i)])
# set up working correlation matrix structure
if (cor.structure == "independence") {
R_alpha <- diag(1, n_vec[i], n_vec[i])
} else if (cor.structure == "exchangeable") {
R_alpha <- matrix(alpha_est, n_vec[i], n_vec[i]) + diag(c(1 - alpha_est), n_vec[i], n_vec[i])
} else if (cor.structure == "ar1") {
R_alpha <- alpha_est^outer(1:n_vec[i], 1:n_vec[i], \(x, y) abs(x - y))
R_alpha <- alpha_est^outer(seq(n_vec[i]), seq(n_vec[i]), \(x, y) abs(x - y))
} else {
stop("Currently unsupported correlation structure.")
}
# compute iteration values for each statistic
temp_seq_n <- (sum(n_vec1[1:i]) + 1):k
temp_seq_n <- (sum(n_vec1[seq(i)]) + 1):k
mu_est_sub <- mu_est[temp_seq_n]
diag_sqrt_V_est <- diag(sqrt(V_est[temp_seq_n]),
nrow = n_vec[i],
Expand Down
3 changes: 2 additions & 1 deletion R/summarizeModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#' @author Jack Leary
#' @author Rhonda Bacher
#' @import magrittr
#' @importFrom stats vcov
#' @description This function summarizes the model for each gene and allows for quantiative interpretation of fitted gene dynamics.
#' @param marge.model The fitted model output from \code{\link{marge2}} (this function is internally called by \code{\link{testDynamic}}). Defaults to NULL.
#' @param pt The predictor matrix of pseudotime. Defaults to NULL.
Expand All @@ -27,7 +28,7 @@ summarizeModel <- function(marge.model = NULL, pt = NULL) {
if (marge.model$model_type == "GEE") {
coef_vcov <- as.matrix(marge.model$final_mod$var)
} else {
coef_vcov <- vcov(marge.model$final_mod)
coef_vcov <- stats::vcov(marge.model$final_mod)
}
coef_df <- data.frame(coef_name = names(coef(marge.model$final_mod)),
coef_value = unname(coef(marge.model$final_mod)),
Expand Down
16 changes: 8 additions & 8 deletions R/testDynamic.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#' @param n.cores (Optional) If running in parallel, how many cores should be used? Defaults to 2.
#' @param approx.knot (Optional) Should the knot space be reduced in order to improve computation time? Defaults to TRUE.
#' @param glmm.adaptive (Optional) Should the basis functions for the GLMM be chosen adaptively? If not, uses 4 evenly spaced knots. Defaults to FALSE.
#' @param track.time (Optional) A boolean indicating whether the amount of time the function takes to run should be tracked and printed to the console. Useful for debugging. Defaults to FALSE.
#' @param track.time (Optional) A boolean indicating whether the amount of time the function takes to run should be tracked and printed to the console. Defaults to TRUE.
#' @param random.seed (Optional) The random seed used to initialize RNG streams in parallel. Defaults to 312.
#' @details
#' \itemize{
Expand Down Expand Up @@ -87,7 +87,7 @@ testDynamic <- function(expr.mat = NULL,
parallel.exec = TRUE,
n.cores = 2,
approx.knot = TRUE,
track.time = FALSE,
track.time = TRUE,
random.seed = 312) {
# check inputs
if (is.null(expr.mat) || is.null(pt)) { stop("You forgot some inputs to testDynamic().") }
Expand Down Expand Up @@ -123,7 +123,7 @@ testDynamic <- function(expr.mat = NULL,

# set pseudotime lineage column names automatically to prevent user error (uses e.g., "Lineage_A", "Lineage_B", etc.)
n_lineages <- ncol(pt)
colnames(pt) <- paste0("Lineage_", LETTERS[1:n_lineages])
colnames(pt) <- paste0("Lineage_", LETTERS[seq(n_lineages)])

# ensure subject ID vector meets criteria for GEE / GLMM fitting
if ((is.gee || is.glmm) && is.null(id.vec)) { stop("You must provide a vector of IDs if you're using GEE / GLMM backends.") }
Expand Down Expand Up @@ -333,11 +333,11 @@ testDynamic <- function(expr.mat = NULL,
is.glmm = is.glmm)
}
# add test stats to result list
lineage_list[[j]]$Test_Stat = ifelse(is.gee, test_res$Wald_Stat, test_res$LRT_Stat)
lineage_list[[j]]$Test_Stat_Note = test_res$Notes
lineage_list[[j]]$P_Val = test_res$P_Val
lineage_list[[j]]$Test_Stat <- ifelse(is.gee, test_res$Wald_Stat, test_res$LRT_Stat)
lineage_list[[j]]$Test_Stat_Note <- test_res$Notes
lineage_list[[j]]$P_Val <- test_res$P_Val
}
names(lineage_list) <- paste0("Lineage_", LETTERS[1:n_lineages])
names(lineage_list) <- paste0("Lineage_", LETTERS[seq(n_lineages)])
lineage_list
}

Expand Down Expand Up @@ -374,7 +374,7 @@ testDynamic <- function(expr.mat = NULL,
MARGE_Slope_Data = NULL,
Gene_Dynamics = NULL)
})
names(reformatted_results) <- paste0("Lineage_", LETTERS[1:n_lineages])
names(reformatted_results) <- paste0("Lineage_", LETTERS[seq(n_lineages)])
return(reformatted_results)
} else {
return(x)
Expand Down
Loading

0 comments on commit deed8d9

Please sign in to comment.