AllenInstitute · leechangkyu · May 22, 2020 · May 22, 2020 · May 31, 2020 · Jun 3, 2020
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,42 +1,46 @@
 Package: scrattch.hicat
 Title: Hierarchical Iterative Clustering Analysis for Transcriptomic data
-Version: 1.0.0
+Version: 1.0.6
 Authors@R: c(person("Zizhen", "Yao", email = "[email protected]", role = c("aut")),
   person("Lucas", "Graybuck", email = "[email protected]", role = c("aut", "cre")),
   person("Trygve", "Bakken", email = "[email protected]", role = c("aut")),
   person("Cindy", "van Velthoven", email = "[email protected]", role = c("aut")),
   person("Jeremy", "Miller", email = "[email protected]", role = c("aut")),	
   person("Adriana","Sedeno-Cortes", email ="[email protected]", role = c("aut")),
   person("Changkyu","Lee", email="[email protected]", role = c("aut")),
-  person("Lawrence","Huang", email="[email protected]", role = c("aut")),
   person("Saroja","Somasundaram", email="[email protected]", role = c("aut")))
 Description: Iteractive clustering of single cell RNASeq dataset. 
 BugReports: https://github.com/AllenInstitute/scrattch.hicat/issues
-Depends: R (>= 3.3.0)
+Depends: R (>= 4.0.0)
 License: GPL-3
 Encoding: UTF-8
 LazyData: TRUE
 VignetteBuilder: knitr
 RoxygenNote: 7.0.2
-Requires: Matrix
+Requires: Matrix,Rphenograph,
 Imports: 
+  Rcpp,
+  BiocNeighbors,	
+  RcppAnnoy,
+  RcppEigen,
+  RcppProgress,
+  parallel,
+  doMC,
+  foreach,	
   data.table,
-  dendextend,
-  devtools,
-  doParallel,
   dplyr,
-  foreach,
   ggplot2,
   grid,
-  limma,
+  dendextend,
+  Matrix,
   matrixStats,
   pvclust,
-  qlcMatrix,
   Rtsne,
-  WGCNA
-Suggests: 
+  RcppParallel
+LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress, beachmat, RcppParallel
+Suggests:
+    WGCNA, 
     knitr,
     rmarkdown,
-    testthat,
-    Rphenograph,
+    testthat,     
     covr
diff --git a/NAMESPACE b/NAMESPACE
@@ -1 +1,6 @@
+useDynLib(scrattch.hicat)
+importFrom(Rcpp, sourceCpp)
+importFrom(Rcpp, evalCpp)
+importFrom(RcppParallel, RcppParallelLibs)
 exportPattern("^[[:alpha:]]+")
+
diff --git a/R/KNN.graph.R b/R/KNN.graph.R
@@ -2,7 +2,6 @@
 #'
 #' @param rd.dat 
 #' @param cl 
-#' @param cl.df 
 #' @param k 
 #' @param knn.outlier.th 
 #' @param outlier.frac.th 
@@ -11,46 +10,51 @@
 #' @export
 #'
 #' @examples
-get_knn_graph <- function(rd.dat, cl,cl.df, k=15, knn.outlier.th=2, outlier.frac.th=0.5)
+get_knn_graph <- function(rd.dat, cl, k=15, knn.outlier.th=2, outlier.frac.th=0.5,clean.cells=row.names(rd.dat), knn.result=NULL)
 {
-  knn.result = RANN::nn2(rd.dat,k=k)
-  row.names(knn.result[[1]]) = row.names(knn.result[[2]])=row.names(rd.dat)
+  if(is.null(knn.result)){
+    knn.result = RANN::nn2(rd.dat,k=k)
+    row.names(knn.result[[1]]) = row.names(knn.result[[2]])=row.names(rd.dat)
+  }
   knn  = knn.result[[1]]
   knn.dist = knn.result[[2]]
-  cl.knn.dist.mean = tapply(names(cl),cl, function(x) mean(knn.dist[x,-1]))
-  cl.knn.dist.sd = tapply(names(cl),cl, function(x) sd(knn.dist[x,-1]))
-  cl.knn.dist.th = (cl.knn.dist.mean + knn.outlier.th * cl.knn.dist.sd)
+  knn.cl = matrix(cl[row.names(knn)[knn]],ncol=ncol(knn))
+  row.names(knn.cl)= row.names(knn)  
+  cl.knn.dist.med = tapply(clean.cells,cl[clean.cells], function(x) median(knn.dist[x,-1]))
+  cl.knn.dist.mad = tapply(clean.cells, cl[clean.cells], function(x) mad(knn.dist[x,-1]))
+  cl.knn.dist.th = cl.knn.dist.med + knn.outlier.th * cl.knn.dist.mad
+  cl.knn.dist.th = pmax(cl.knn.dist.th, median(cl.knn.dist.th))
 
   knn.dist.th=cl.knn.dist.th[as.character(cl[row.names(knn)])]
   outlier = apply(knn.dist, 2, function(x) x>  knn.dist.th)
+  outlier[!row.names(knn)[knn] %in% clean.cells]=TRUE
   row.names(outlier)  = row.names(knn.dist)
+  outlier.frac= rowMeans(outlier)
+
   knn[outlier] = NA
-  select.cells = row.names(outlier)[rowMeans(outlier) < outlier.frac.th]  
-  pred.result = predict_knn(knn[select.cells,], row.names(rd.dat), cl)
-  pred.prob = pred.result$pred.prob
-  knn.cell.cl.counts = round(pred.prob * ncol(knn))
-  knn.cl.cl.counts = do.call("rbind",tapply(row.names(pred.prob), cl[row.names(pred.prob)], function(x)colSums(knn.cell.cl.counts[x,])))
-  knn.cl.df = as.data.frame(as.table(knn.cl.cl.counts))
-  colnames(knn.cl.df)[1:2] = c("cl.from","cl.to")
-  from.size = rowSums(knn.cl.cl.counts)
-  to.size = colSums(knn.cl.cl.counts)
-  total = sum(knn.cl.cl.counts)
+  knn.cl[outlier] = NA
+
+  select.cells = intersect(clean.cells, names(outlier.frac)[outlier.frac < outlier.frac.th])
+  tmp.df = data.frame(sample_name =rep(select.cells, ncol(knn)), cl.to=as.vector(knn.cl[select.cells,]))
+  tmp.df$cl.from = cl[as.character(tmp.df$sample_name)]
+  knn.cl.df = tmp.df %>% filter(!is.na(cl.to)) %>% group_by(cl.from, cl.to) %>% summarise(Freq=n())
+    
+  from.size = tapply(knn.cl.df$Freq, knn.cl.df$cl.from, sum)
+  to.size = tapply(knn.cl.df$Freq, knn.cl.df$cl.to, sum)
+  total = sum(knn.cl.df$Freq)
   knn.cl.df$cl.from.total= from.size[as.character(knn.cl.df$cl.from)]
   knn.cl.df$cl.to.total = to.size[as.character(knn.cl.df$cl.to)]
-  knn.cl.df = knn.cl.df[knn.cl.df$Freq > 0,]
   knn.cl.df$pval.log = knn.cl.df$odds  = 0
   for(i in 1:nrow(knn.cl.df)){
     q = knn.cl.df$Freq[i] - 1
     k = knn.cl.df$cl.from.total[i]
     m = knn.cl.df$cl.to.total[i]
     n = total - m
     knn.cl.df$pval.log[i]=phyper(q, m=m, n=n, k=k, lower.tail = FALSE, log.p=TRUE)
-    knn.cl.df$odds[i] = (q + 1) / (k * m /total)
+    knn.cl.df$odds[i] = (q + 1) / (k /total * m)
   }
   knn.cl.df$frac = knn.cl.df$Freq/knn.cl.df$cl.from.total
-  knn.cl.df$cl.from.label = cl.df[as.character(knn.cl.df$cl.from),"cluster_label"]
-  knn.cl.df$cl.to.label = cl.df[as.character(knn.cl.df$cl.to),"cluster_label"]
-  return(list(knn.result=knn.result, pred.result=pred.result, knn.cl.df=knn.cl.df))
+  return(list(knn.result=knn.result, knn.cl.df=knn.cl.df,outlier=outlier))
 }
 
 
@@ -82,7 +86,7 @@ get_knn_graph <- function(rd.dat, cl,cl.df, k=15, knn.outlier.th=2, outlier.frac
 #' @usage plotting.MGE.constellation <- plot_constellation(knn.cl.df = knn.cl.df, cl.center.df = cl.center.df, out.dir = "data/Constellation_example/plot", node.dodge=TRUE, plot.hull=c(1,2)) 
 
 
-plot_constellation <- function(knn.cl.df, cl.center.df, out.dir, node.label="cluster_id", exxageration=2, curved = TRUE, plot.parts=FALSE, plot.hull = NULL, plot.height=25, plot.width=25, node.dodge=FALSE, label.size=2, max_size=10) { 
+plot_constellation <- function(knn.cl.df, cl.center.df, out.dir, node.label="cluster_id", exxageration=2, curved = TRUE, plot.parts=FALSE, plot.hull = NULL, plot.height=25, plot.width=25, node.dodge=FALSE, label.size=2, max_size=10,   size.breaks = c(100,1000,10000,100000)) { 
 
   library(gridExtra)
   library(sna)
@@ -119,7 +123,7 @@ plot_constellation <- function(knn.cl.df, cl.center.df, out.dir, node.label="clu
                                  color=alpha(cluster_color, 0.8))) +
                   scale_size_area(trans="sqrt",
                                   max_size=max_size,
-                                  breaks = c(100,1000,10000,100000)) +
+                                  breaks = size.breaks) + 
                   scale_color_identity() +  
                   geom_text(data=cl.center.df,
                             aes(x=x, 
@@ -568,8 +572,10 @@ if (exxageration !=1) {
 
   g2 <- gridExtra::arrangeGrob(grobs=list(dot.size.legend,edge.width.legend,cl.center.legend), layout_matrix=layout_legend)
 
-
-  ggsave(file.path(out.dir,paste0(st,"constellation.pdf")),marrangeGrob(list(plot.all,g2),nrow = 1, ncol=1),width = plot.width, height = plot.height, units="cm",useDingbats=FALSE)
+
+  fout = file.path(out.dir,paste0(st,"constellation.pdf"))
+  cat("Save ", fout, "\n")
+  ggsave(fout, marrangeGrob(list(plot.all,g2),nrow = 1, ncol=1),width = plot.width, height = plot.height, units="cm",useDingbats=FALSE)
 
 
 }
@@ -719,3 +725,19 @@ angle <- function(x, y) {
     atan2(y[2] - y[1], x[2] - x[1])
         }
 
+
+
+plot_umap_constellation <- function(umap.2d, cl, cl.df, select.knn.cl.df, dest.d=".", prefix="",...)
+  {
+
+    cl.center.df = as.data.frame(get_RD_cl_center(umap.2d,cl))
+    cl.center.df$cl = row.names(cl.center.df)
+    cl.center.df$cluster_id <- cl.df$cluster_id[match(cl.center.df$cl, cl.df$cl)]
+    cl.center.df$cluster_color <- cl.df$cluster_color[match(cl.center.df$cl, cl.df$cl)]
+    cl.center.df$cluster_label <- cl.df$cluster_label[match(cl.center.df$cl, cl.df$cl)] 
+    cl.center.df$cluster_size <- cl.df$cluster_size[match(cl.center.df$cl, cl.df$cl)]
+    tmp.cl = row.names(cl.center.df)
+    tmp.knn.cl.df = select.knn.cl.df  %>% filter(cl.from %in% tmp.cl & cl.to %in% tmp.cl)
+    p=plot_constellation(tmp.knn.cl.df, cl.center.df, node.label="cluster_id", out.dir=file.path(dest.d,prefix),...)    
+  }
+
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -0,0 +1,119 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+rcpp_get_cl_means <- function(mat, clAll) {
+    .Call('_scrattch_hicat_rcpp_get_cl_means', PACKAGE = 'scrattch.hicat', mat, clAll)
+}
+
+rcpp_get_cl_means_RcppParallel <- function(mat, clAll) {
+    .Call('_scrattch_hicat_rcpp_get_cl_means_RcppParallel', PACKAGE = 'scrattch.hicat', mat, clAll)
+}
+
+rcpp_get_cl_present <- function(mat, clAll, lowth) {
+    .Call('_scrattch_hicat_rcpp_get_cl_present', PACKAGE = 'scrattch.hicat', mat, clAll, lowth)
+}
+
+rcpp_get_cl_present_RcppParallel <- function(mat, clAll, lowth) {
+    .Call('_scrattch_hicat_rcpp_get_cl_present_RcppParallel', PACKAGE = 'scrattch.hicat', mat, clAll, lowth)
+}
+
+rcpp_get_cl_sqr_means <- function(mat, clAll) {
+    .Call('_scrattch_hicat_rcpp_get_cl_sqr_means', PACKAGE = 'scrattch.hicat', mat, clAll)
+}
+
+rcpp_get_cl_sqr_means_RcppParallel <- function(mat, clAll) {
+    .Call('_scrattch_hicat_rcpp_get_cl_sqr_means_RcppParallel', PACKAGE = 'scrattch.hicat', mat, clAll)
+}
+
+rcpp_get_cl_medians <- function(mat, clAll) {
+    .Call('_scrattch_hicat_rcpp_get_cl_medians', PACKAGE = 'scrattch.hicat', mat, clAll)
+}
+
+rcpp_get_cl_medians_RcppParallel <- function(mat, clAll) {
+    .Call('_scrattch_hicat_rcpp_get_cl_medians_RcppParallel', PACKAGE = 'scrattch.hicat', mat, clAll)
+}
+
+RowMergeMatrices <- function(mat1, mat2, mat1_rownames, mat2_rownames, all_rownames) {
+    .Call('_scrattch_hicat_RowMergeMatrices', PACKAGE = 'scrattch.hicat', mat1, mat2, mat1_rownames, mat2_rownames, all_rownames)
+}
+
+RowMergeMatricesList <- function(mat_list, mat_rownames, all_rownames) {
+    .Call('_scrattch_hicat_RowMergeMatricesList', PACKAGE = 'scrattch.hicat', mat_list, mat_rownames, all_rownames)
+}
+
+LogNorm <- function(data, scale_factor, display_progress = TRUE) {
+    .Call('_scrattch_hicat_LogNorm', PACKAGE = 'scrattch.hicat', data, scale_factor, display_progress)
+}
+
+Standardize <- function(mat, display_progress = TRUE) {
+    .Call('_scrattch_hicat_Standardize', PACKAGE = 'scrattch.hicat', mat, display_progress)
+}
+
+FastSparseRowScale <- function(mat, scale = TRUE, center = TRUE, scale_max = 10, display_progress = TRUE) {
+    .Call('_scrattch_hicat_FastSparseRowScale', PACKAGE = 'scrattch.hicat', mat, scale, center, scale_max, display_progress)
+}
+
+FastSparseRowScaleWithKnownStats <- function(mat, mu, sigma, scale = TRUE, center = TRUE, scale_max = 10, display_progress = TRUE) {
+    .Call('_scrattch_hicat_FastSparseRowScaleWithKnownStats', PACKAGE = 'scrattch.hicat', mat, mu, sigma, scale, center, scale_max, display_progress)
+}
+
+FastCov <- function(mat, center = TRUE) {
+    .Call('_scrattch_hicat_FastCov', PACKAGE = 'scrattch.hicat', mat, center)
+}
+
+FastCovMats <- function(mat1, mat2, center = TRUE) {
+    .Call('_scrattch_hicat_FastCovMats', PACKAGE = 'scrattch.hicat', mat1, mat2, center)
+}
+
+FastRBind <- function(mat1, mat2) {
+    .Call('_scrattch_hicat_FastRBind', PACKAGE = 'scrattch.hicat', mat1, mat2)
+}
+
+SparseRowMean <- function(mat, display_progress) {
+    .Call('_scrattch_hicat_SparseRowMean', PACKAGE = 'scrattch.hicat', mat, display_progress)
+}
+
+SparseRowVar2 <- function(mat, mu, display_progress) {
+    .Call('_scrattch_hicat_SparseRowVar2', PACKAGE = 'scrattch.hicat', mat, mu, display_progress)
+}
+
+SparseRowVarStd <- function(mat, mu, sd, vmax, display_progress) {
+    .Call('_scrattch_hicat_SparseRowVarStd', PACKAGE = 'scrattch.hicat', mat, mu, sd, vmax, display_progress)
+}
+
+FastLogVMR <- function(mat, display_progress) {
+    .Call('_scrattch_hicat_FastLogVMR', PACKAGE = 'scrattch.hicat', mat, display_progress)
+}
+
+RowVar <- function(x) {
+    .Call('_scrattch_hicat_RowVar', PACKAGE = 'scrattch.hicat', x)
+}
+
+SparseRowVar <- function(mat, display_progress) {
+    .Call('_scrattch_hicat_SparseRowVar', PACKAGE = 'scrattch.hicat', mat, display_progress)
+}
+
+ReplaceColsC <- function(mat, col_idx, replacement) {
+    .Call('_scrattch_hicat_ReplaceColsC', PACKAGE = 'scrattch.hicat', mat, col_idx, replacement)
+}
+
+GraphToNeighborHelper <- function(mat) {
+    .Call('_scrattch_hicat_GraphToNeighborHelper', PACKAGE = 'scrattch.hicat', mat)
+}
+
+ImputeKnn <- function(knn_idx, ref_idx, cell_idx, gene_idx, dat, impute_dat, w_mat_, transpose_input, transpose_output) {
+    invisible(.Call('_scrattch_hicat_ImputeKnn', PACKAGE = 'scrattch.hicat', knn_idx, ref_idx, cell_idx, gene_idx, dat, impute_dat, w_mat_, transpose_input, transpose_output))
+}
+
+ComputeSNN <- function(nn_ranked, prune) {
+    .Call('_scrattch_hicat_ComputeSNN', PACKAGE = 'scrattch.hicat', nn_ranked, prune)
+}
+
+WriteEdgeFile <- function(snn, filename, display_progress) {
+    invisible(.Call('_scrattch_hicat_WriteEdgeFile', PACKAGE = 'scrattch.hicat', snn, filename, display_progress))
+}
+
+DirectSNNToFile <- function(nn_ranked, prune, display_progress, filename) {
+    .Call('_scrattch_hicat_DirectSNNToFile', PACKAGE = 'scrattch.hicat', nn_ranked, prune, display_progress, filename)
+}
+