Skip to content

Commit 6abe3ba

Browse files
committed
initial commit
0 parents  commit 6abe3ba

25 files changed

+682
-0
lines changed

.Rbuildignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
^.*\.Rproj$
2+
^\.Rproj\.user$
3+
^\.travis
4+
^\.git$
5+
^README\.Rmd$
6+
cran-comments.md

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
.Ruserdata
5+
.DS_Store

.travis.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
language: r
2+
cache: packages
3+
matrix:
4+
include:
5+
- r: oldrel
6+
os: linux
7+
- r: release
8+
os: linux
9+
- r: devel
10+
os: linux
11+
- r: release
12+
os: osx
13+
r_packages:
14+
- covr
15+
- devtools
16+
after_success:
17+
- R -e 'covr::codecov()'

DESCRIPTION

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
Package: lmds
2+
Type: Package
3+
Title: Landmark Multi-Dimensional Scaling
4+
Version: 0.1.0
5+
Authors@R: c(
6+
person(
7+
"Robrecht", "Cannoodt",
8+
email = "[email protected]",
9+
role = c("aut", "cre"),
10+
comment = c(ORCID = "0000-0003-3641-729X", github = "rcannood")
11+
),
12+
person(
13+
"Wouter", "Saelens",
14+
email = "[email protected]",
15+
role = c("aut"),
16+
comment = c(ORCID = "0000-0002-7114-6248", github = "zouter")
17+
)
18+
)
19+
Description:
20+
Landmark Multi-Dimensional Scaling (LMDS) is an extension of classical 'Torgerson' MDS.
21+
LMDS scales to significantly larger data sets since it only computes the distances between
22+
a set of landmarks and all samples.
23+
Has built-in support for sparse matrices.
24+
License: GPL-3
25+
Encoding: UTF-8
26+
LazyData: true
27+
Imports:
28+
assertthat,
29+
dynutils (>= 1.0.3),
30+
irlba,
31+
Matrix
32+
Suggests:
33+
testthat
34+
RoxygenNote: 6.1.1
35+
Roxygen: list(markdown = TRUE)
36+
URL: http://github.com/dynverse/lmds
37+
BugReports: https://github.com/dynverse/lmds/issues
38+
Collate:
39+
'cmdscale_landmarks.R'
40+
'select_landmarks.R'
41+
'lmds.R'
42+
'package.R'

NAMESPACE

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(cmdscale_landmarks)
4+
export(lmds)
5+
export(select_landmarks)
6+
importFrom(assertthat,assert_that)
7+
importFrom(dynutils,calculate_distance)
8+
importFrom(dynutils,is_sparse)
9+
importFrom(dynutils,scale_uniform)
10+
importFrom(irlba,partial_eigen)

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# lmds 0.1.0
2+
3+
Initial release of lmds.
4+
* Landmark Multi-Dimensional Scaling (LMDS) is an extension of classical 'Torgerson' MDS.
5+
LMDS scales to significantly larger data sets since it only computes the distances between
6+
a set of landmarks and all samples. Has built-in support for sparse matrices.

R/cmdscale_landmarks.R

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#' Perform MDS on landmarks and project other samples to the same space
2+
#'
3+
#' @param dist_2lm Distance matrix between the landmarks and all the samples in original dataset
4+
#' @param ndim The number of dimensions
5+
#' @param rescale Whether or not to rescale the final dimensionality reduction (recommended)
6+
#' @param ... Extra params to pass to [irlba::irlba()]
7+
#'
8+
#' @importFrom irlba partial_eigen
9+
#' @importFrom dynutils scale_uniform
10+
#'
11+
#' @export
12+
#'
13+
#' @examples
14+
#' library(Matrix)
15+
#' x <- Matrix::rsparsematrix(10000, 1000, .01)
16+
#' dist_2lm <- select_landmarks(x)
17+
#' cmdscale_landmarks(dist_2lm)
18+
cmdscale_landmarks <- function(dist_2lm, ndim = 3, rescale = TRUE, ...) {
19+
assert_that(
20+
!is.null(attr(dist_2lm, "landmark_ix")) || (!is.null(rownames(dist_2lm)) && !is.null(colnames(dist_2lm))),
21+
is.numeric(ndim), length(ndim) == 1, ndim >= 1,
22+
is.logical(rescale), length(rescale) == 1, !is.na(rescale)
23+
)
24+
25+
ix_lm <- attr(dist_2lm, "landmark_ix")
26+
if (is.null(ix_lm)) {
27+
ix_lm <- match(rownames(dist_2lm), colnames(dist_2lm))
28+
}
29+
30+
# short hand notations
31+
x <- dist_2lm[, ix_lm, drop = FALSE]^2
32+
n <- as.integer(nrow(x))
33+
N <- as.integer(ncol(dist_2lm))
34+
35+
# double center data
36+
mu_n <- rowMeans(x)
37+
mu <- mean(x)
38+
x_dc <-
39+
sweep(
40+
sweep(x, 1, mu_n, "-"),
41+
2, mu_n, "-"
42+
) + mu
43+
44+
# classical MDS on landmarks
45+
e <- irlba::partial_eigen(-x_dc / 2, symmetric = TRUE, n = ndim, ...)
46+
ev <- e$values
47+
evec <- e$vectors
48+
ndim1 <- sum(ev > 0)
49+
if (ndim1 < ndim) {
50+
warning(gettextf("only %d of the first %d eigenvalues are > 0", ndim1, ndim), domain = NA)
51+
evec <- evec[, ev > 0, drop = FALSE]
52+
ev <- ev[ev > 0]
53+
ndim <- ndim1
54+
}
55+
56+
# distance-based triangulation
57+
points_inv <- evec / rep(sqrt(ev), each = n)
58+
dimred <- (-t(dist_2lm - rep(mu_n, each = N)) / 2) %*% points_inv
59+
60+
if (rescale) {
61+
dimred <- dynutils::scale_uniform(dimred)
62+
}
63+
64+
dimred
65+
}

R/lmds.R

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#' Landmark MDS
2+
#'
3+
#' Landmark Multi-Dimensional Scaling (LMDS) is an extension of classical 'Torgerson' MDS.
4+
#' LMDS scales to significantly larger data sets since it only computes the distances between
5+
#' a set of landmarks and all samples. Has built-in support for sparse matrices.
6+
#'
7+
#' @inheritParams select_landmarks
8+
#' @inheritParams cmdscale_landmarks
9+
#'
10+
#' @export
11+
#'
12+
#' @include cmdscale_landmarks.R select_landmarks.R
13+
#'
14+
#' @examples
15+
#' library(Matrix)
16+
#' x <- Matrix::rsparsematrix(10000, 1000, .01)
17+
#' lmds(x, ndim = 3)
18+
lmds <- dynutils::inherit_default_params(
19+
list(select_landmarks, cmdscale_landmarks),
20+
function(
21+
x,
22+
ndim,
23+
distance_method,
24+
landmark_method,
25+
num_landmarks
26+
) {
27+
# select the landmarks
28+
dist_2lm <- select_landmarks(
29+
x = x,
30+
distance_method = distance_method,
31+
landmark_method = landmark_method,
32+
num_landmarks = num_landmarks
33+
)
34+
35+
# reduce dimensionality for landmarks and project to non-landmarks
36+
dimred <- cmdscale_landmarks(
37+
dist_2lm = dist_2lm,
38+
ndim = ndim,
39+
rescale = TRUE
40+
)
41+
42+
rownames(dimred) <- rownames(x)
43+
colnames(dimred) <- paste0("comp_", seq_len(ndim))
44+
45+
dimred
46+
}
47+
)
48+
49+
50+
51+
52+

R/package.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#' Landmark Multi-Dimensional Scalng
2+
#'
3+
#' Landmark Multi-Dimensional Scaling (LMDS) is an extension of classical 'Torgerson' MDS.
4+
#' LMDS scales to significantly larger data sets since it only computes the distances between
5+
#' a set of landmarks and all samples.
6+
#' Has built-in support for sparse matrices.
7+
#'
8+
#' @importFrom assertthat assert_that
9+
#' @importFrom dynutils calculate_distance is_sparse
10+
#'
11+
#' @docType package
12+
#' @name lmds
13+
NULL

R/select_landmarks.R

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#' Select landmarks from dataset
2+
#'
3+
#' In addition, the distances between the landmarks and all samples are calculated.
4+
#'
5+
#' @param x A matrix, optionally sparse.
6+
#' @param distance_method The distance metric to use. Options are "euclidean" (default), "pearson", "spearman", "cosine", "manhattan".
7+
#' @param landmark_method The landmark selection method to use. Options are "sample" (default).
8+
#' @param num_landmarks The number of landmarks to use,
9+
#'
10+
#' @return The distance matrix between the landmarks and all samples. In addition, an attribute `"landmark_ix"`
11+
#' denotes the indices of landmarks that were sampled.
12+
#'
13+
#' @export
14+
#'
15+
#' @examples
16+
#' library(Matrix)
17+
#' x <- Matrix::rsparsematrix(10000, 1000, .01)
18+
#' select_landmarks(x)
19+
select_landmarks <- function(
20+
x,
21+
distance_method = c("euclidean", "pearson", "spearman", "cosine", "manhattan"),
22+
landmark_method = c("sample"),
23+
num_landmarks = 500
24+
) {
25+
distance_method <- match.arg(distance_method)
26+
landmark_method <- match.arg(landmark_method)
27+
assert_that(
28+
is.matrix(x) || is_sparse(x),
29+
is.numeric(num_landmarks), length(num_landmarks) == 1, num_landmarks > 2
30+
)
31+
32+
# parameter check on num_landmarks
33+
if (num_landmarks > nrow(x)) {
34+
num_landmarks <- nrow(x)
35+
}
36+
37+
# naive -> just subsample the cell ids
38+
if (landmark_method == "sample") {
39+
ix_lm <- sample.int(nrow(x), num_landmarks)
40+
dist_2lm <- as.matrix(calculate_distance(x[ix_lm, , drop = FALSE], x, method = distance_method))
41+
}
42+
43+
dist_2lm <- zapsmall(dist_2lm)
44+
45+
attr(dist_2lm, "landmark_ix") <- ix_lm
46+
47+
dist_2lm
48+
}

0 commit comments

Comments
 (0)