From 377e9977b37657468b0a138c3417d4b1673bf2c4 Mon Sep 17 00:00:00 2001 From: Joseph Knox Date: Thu, 17 May 2018 23:54:01 -0700 Subject: [PATCH 1/7] WIP: working on sketch module\nWorking on writing a seperate sketch module that incorporates additional sketching aglorithms --- ristretto/sketch/__init__.py | 0 ristretto/sketch/range_finders.py | 184 ++++++++++++++++++++++++++++++ ristretto/{ => sketch}/sketch.py | 83 ++------------ 3 files changed, 192 insertions(+), 75 deletions(-) create mode 100644 ristretto/sketch/__init__.py create mode 100644 ristretto/sketch/range_finders.py rename ristretto/{ => sketch}/sketch.py (50%) diff --git a/ristretto/sketch/__init__.py b/ristretto/sketch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ristretto/sketch/range_finders.py b/ristretto/sketch/range_finders.py new file mode 100644 index 0000000..bbe9d39 --- /dev/null +++ b/ristretto/sketch/range_finders.py @@ -0,0 +1,184 @@ +""" +Functions for approximating the range of a matrix A. +""" +from functools import reduce +import operator as op + +import numpy as np +from scipy import linalg +from scipy import fftpack +from scipy import stats +from scipy import sparse + +from ..utils import check_random_state + + +def _axis_wrapper(func): + """Wraps range finders, dealing with axis keyword arguments. + + If axis == 1: return function + If axis == 0: transpose A, return transpose of results + else: raise error + """ + def check_axis_and_call(*args, **kwargs): + axis = kwargs.get('axis', 1) + if axis not in (0, 1): + raise ValueError('If supplied, axis must be in (0, 1)') + if axis == 0: + A, *args = args + result = func(A, *args, **kwargs) + if isinstance(result, tuple): + return tuple(map(np.transpose, result)) + return result.T + return func(*args, **kwargs) + return check_axis_and_call + + +def _ortho(X): + """orthonormalize the columns of X via QR decomposition""" + Q, _ = linalg.qr(X, overwrite_a=True, mode='economic', pivoting=False, + check_finite=False) + return Q + + +@_axis_wrapper +def randomized_uniform_sampling(A, l, axis=1, random_state=None): + """Uniform randomized sampling transform. + + Given an m x n matrix A, and an integer l, this returns an m x l + random subset of the range of A. + + """ + random_state = check_random_state(random_state) + + A = np.asarray_chfinite(A) + m, n = A.shape + + # sample l columns with equal probability + idx = random_state.choice(n, size=l, replace=False) + return A[:, idx] + + +@_axis_wrapper +def randomized_basic(A, l, axis=1, random_state=None): + """ + + Given an m x n matrix A, and an integer l, this scheme computes an m x l + orthonormail matrix Q whose range approximates the range of A + + Notes + ----- + Also known as randQB + """ + random_state = check_random_state(random_state) + + A = np.asarray_chfinite(A) + m, n = A.shape + + # construct gaussian random matrix + Omega = random_state.standard_normal(size=(n, l)).astype(A.dtype) + + # project A onto Omega + Y = A.dot(Omega) + + # orthonormalize Y + Q = _ortho(Y) + B = Q.T.dot(A) + + return Q, B + + +@_axis_wrapper +def randomized_blocked_adaptive(A, r=10, tol=1e-3, axis=1, random_state=None): + """ + + Given an m x n matrix A, a tolerance tol, and an iteger r, + adaptive_randomized_range_finder computes an orthonormal matrix Q such + that `` | I - Q Q^*) A | <= tol `` holds with at least probability + ``1 - min(m, n)*10^-r``. + + Notes + ----- + Also known as randQB_b + """ + random_state = check_random_state(random_state) + + A = np.asarray_chfinite(A) + m, n = A.shape + + Q_iters, QQT_iters, B_iters = [], [], [] + for _ in range(r): + # construct gaussian random matrix + Omega = random_state.standard_normal(size=(n, r)).astype(A.dtype) + + Q = _ortho(A.dot(Omega)) + + if Q_iters: + # Qi = orth(Qi - sum(Qj Qj^T Qi)) + Q -= reduce(op.add, map(lambda x: x.dot(Q), QQT_iters)) + + Q = _ortho(Q) + + # compute QB decomposition + B = Q.T.dot(A) + + # break if we reach desired rank + if linalg.norm(A - Q.dot(B)) < tol: + break + + # update + Q_iters.append(Q) + B_iters.append(B) + QQT_iters.append(Q.dot(Q.T)) + + return np.hstack(Q_iters), np.vstack(B_iters) + + +@_axis_wrapper +def randomized_subspace_iteration(A, l, n_iter=10, axis=1, random_state=None): + """ + + Given an m x n matrix A, and an integer l, this scheme computes an m x l + orthonormail matrix Q whose range approximates the range of A. + + """ + random_state = check_random_state(random_state) + + A = np.asarray_chfinite(A) + m, n = A.shape + + # construct gaussian random matrix + Omega = random_state.standard_normal(size=(n, l)).astype(A) + + # project A onto Omega + Y = A.dot(Omega) + + Q = _ortho(Y) + for _ in range(n_iter): + Z = _ortho(A.T.dot(Q)) + Q = _ortho(A.dot(Z)) + + B = Q.T.dot(A) + + return Q, B + + +# TODO: need to verify +#def fast_johnson_lindenstrauss_transform(A, l, random_state=None): +# """Fast Johnson-Lindenstrauss Transform. +# +# Given an m x n matrix A, and an integer l, this scheme computes an m x l +# orthonormail matrix Q whose range approximates the range of A +# +# """ +# m, n = A.shape +# +# d = random_state.choice((-1, 1), size=l) +# d = sparse.spdiags(d, 0, n, n) +# +# # project A onto d, compute DCT +# Ad = A.dot(d) +# Adf = fftpack.dct(Ad, axis=1, norm='ortho') +# +# # uniformly sample +# Q = randomized_uniform_sampling_transform(Adf, l) diff --git a/ristretto/sketch.py b/ristretto/sketch/sketch.py similarity index 50% rename from ristretto/sketch.py rename to ristretto/sketch/sketch.py index ca67a25..a287a43 100644 --- a/ristretto/sketch.py +++ b/ristretto/sketch/sketch.py @@ -17,81 +17,14 @@ _VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) _QR_KWARGS = dict(mode='economic', check_finite=False, overwrite_a=True) - -def _output_rank_check(A, output_rank): - n, m = A.shape - rank = min(n, m) if output_rank is None else output_rank - - if rank > min(n, m): - warnings.warn('output_rank %d is greater than the minimum ' - 'dimension of input array A (shape %s). The ' - 'minimum dimension will be chosen instead', - output_rank, n, m, min(n, m)) - rank = min(n, m) - return rank - - -def _get_distribution_func(distribution, random_state): - if distribution == 'uniform': - return partial(random_state.uniform, -1, 1) - return random_state.standard_normal - - -def sketch(A, out=None, output_rank=None, n_oversample=10, n_iter=2, - distribution='uniform', axis=0, check_finite=False, random_state=None): - random_state = check_random_state(random_state) - - # converts A to array, raise ValueError if A has inf or nan - A = np.asarray_chkfinite(A) if check_finite else np.asarray(A) - - if A.dtype not in _VALID_DTYPES: - raise ValueError('A.dtype must be one of %s, not %s' - % (' '.join(_VALID_DTYPES), A.dtype)) - - if distribution not in _VALID_DISTRIBUTIONS: - raise ValueError('distribution must be one of %s, not %s' - % (' '.join(_VALID_DISTRIBUTIONS), distribution)) - - if axis not in (0, 1): - raise ValueError('If specified, axis must be 0 or 1, not %s' % axis) - - # check rank - rank = _output_rank_check(A, output_rank) - - n_oversample += rank - size = (n_oversample, A.shape[0]) if axis == 0 else (A.shape[1], n_oversample) - - # get numpy random func - dist_func = _get_distribution_func(distribution, random_state) - - #Generate a random test matrix Omega - Omega = dist_func(size=size).astype(A.dtype) - - if A.dtype == np.complexfloating: - real_type = np.float32 if A.dtype == np.complex64 else np.float64 - Omega += 1j * dist_func(size=size).astype(real_type) - - #Build sample matrix Y : Y = A * Omega - # Y approximates range of A - Y = A.dot(Omega) - del Omega - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - #Orthogonalize Y using economic QR decomposition: Y=QR - #If q > 0 perfrom q subspace iterations - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - for _ in range(n_iter): - Y, _ = linalg.qr(Y, **_QR_KWARGS) - Z, _ = linalg.qr(conjugate_transpose(A).dot(Y), **_QR_KWARGS) - Y = A.dot(Z) - - # compute sketch - S, _ = linalg.qr(Y, **_QR_KWARGS) - - #if axis == 0: - # return np.dot(conjugate_transpose(S), A, out=out) - #return np.dot(A, S, out=out) - return S +def sketch(A, method='randomized_subspace_iteration', *args, **kwargs): + try: + func = getattr(range_finders, method) + except AttributeError: + # TODO: write better error message + raise ValueError('incorrect method %s passed' ) + + return func(*args, **kwargs) def single_pass_sketch(A, output_rank=None, row_oversample=None, From 77c8cef0a6db0151779e5287a2dc72f88affe149 Mon Sep 17 00:00:00 2001 From: Joseph Knox Date: Fri, 18 May 2018 13:40:03 -0700 Subject: [PATCH 2/7] WIP: need to reconcile edits with package.\nWork from the plane, need to finish --- ristretto/sketch/__init__.py | 7 + ristretto/sketch/_sketches.py | 29 +++ ristretto/sketch/range_finders.py | 216 ++++++---------------- ristretto/sketch/sketch.py | 83 --------- ristretto/sketch/tests/test_sketches.py | 80 ++++++++ ristretto/sketch/tests/test_transforms.py | 100 ++++++++++ ristretto/sketch/tests/test_utils.py | 40 ++++ ristretto/sketch/transforms.py | 138 ++++++++++++++ ristretto/sketch/utils.py | 29 +++ ristretto/utils.py | 31 ++++ 10 files changed, 509 insertions(+), 244 deletions(-) create mode 100644 ristretto/sketch/_sketches.py delete mode 100644 ristretto/sketch/sketch.py create mode 100644 ristretto/sketch/tests/test_sketches.py create mode 100644 ristretto/sketch/tests/test_transforms.py create mode 100644 ristretto/sketch/tests/test_utils.py create mode 100644 ristretto/sketch/transforms.py create mode 100644 ristretto/sketch/utils.py diff --git a/ristretto/sketch/__init__.py b/ristretto/sketch/__init__.py index e69de29..1b50b9e 100644 --- a/ristretto/sketch/__init__.py +++ b/ristretto/sketch/__init__.py @@ -0,0 +1,7 @@ +""" +Module :mod:`sketch` contains the functions for sketching and range finding. +""" + +# TODO: FOR COMPATIBILITY: change once range_finders rewritten +from . import * +from .range_finders import sketch, single_pass_sketch diff --git a/ristretto/sketch/_sketches.py b/ristretto/sketch/_sketches.py new file mode 100644 index 0000000..90d5859 --- /dev/null +++ b/ristretto/sketch/_sketches.py @@ -0,0 +1,29 @@ +""" +Module containing sketching funcitons. +""" +from functools import partial +from math import sqrt + +from scipy import sparse + + +def random_axis_sample(A, l, axis, random_state): + """randomly sample the index of axis""" + return random_state.choice(A.shape[axis], size=l, replace=False) + + +def random_gaussian_map(A, l, axis, random_state): + """generate random gaussian map""" + return random_state.standard_normal(size=(A.shape[axis], l)).astype(A.dtype) + + +def sparse_random_map(A, l, axis, density, random_state): + """generate sparse random sampling""" + # TODO: evaluete random_state paramter: we want to pass it to sparse.random + # most definitely to sparsely sample the nnz elements of Omega, but + # is using random_state in data_rvs redundant? + values = (-sqrt(1. / density), sqrt(1. / density)) + data_rvs = partial(random_state.choice, values) + + return sparse.random(A.shape[axis], l, density=density, data_rvs=data_rvs, + random_state=random_state, dtype=A.dtype) diff --git a/ristretto/sketch/range_finders.py b/ristretto/sketch/range_finders.py index bbe9d39..250abb0 100644 --- a/ristretto/sketch/range_finders.py +++ b/ristretto/sketch/range_finders.py @@ -1,184 +1,78 @@ """ -Functions for approximating the range of a matrix A. +Module containing random sketch generating object. """ -from functools import reduce -import operator as op +# Authors: N. Benjamin Erichson +# Joseph Knox +# License: GNU General Public License v3.0 +from functools import partial +import warnings import numpy as np from scipy import linalg -from scipy import fftpack -from scipy import stats -from scipy import sparse -from ..utils import check_random_state +from . import transforms +from ..utils import check_random_state, conjugate_transpose +_VALID_DISTRIBUTIONS = ('uniform', 'normal') +_QR_KWARGS = dict(mode='economic', check_finite=False, overwrite_a=True) -def _axis_wrapper(func): - """Wraps range finders, dealing with axis keyword arguments. +def sketch(A, method='randomized_subspace_iteration', *args, **kwargs): + try: + func = getattr(transforms, method) + except AttributeError: + # TODO: write better error message + raise ValueError('incorrect method %s passed' ) - If axis == 1: return function - If axis == 0: transpose A, return transpose of results - else: raise error - """ - def check_axis_and_call(*args, **kwargs): - axis = kwargs.get('axis', 1) - if axis not in (0, 1): - raise ValueError('If supplied, axis must be in (0, 1)') - if axis == 0: - A, *args = args - result = func(A, *args, **kwargs) - if isinstance(result, tuple): - return tuple(map(np.transpose, result)) - return result.T - return func(*args, **kwargs) - return check_axis_and_call + return func(*args, **kwargs) -def _ortho(X): - """orthonormalize the columns of X via QR decomposition""" - Q, _ = linalg.qr(X, overwrite_a=True, mode='economic', pivoting=False, - check_finite=False) - return Q +def single_pass_sketch(A, output_rank=None, row_oversample=None, + column_oversample=10, distribution='uniform', + check_finite=False, random_state=None): - -@_axis_wrapper -def randomized_uniform_sampling(A, l, axis=1, random_state=None): - """Uniform randomized sampling transform. - - Given an m x n matrix A, and an integer l, this returns an m x l - random subset of the range of A. - - """ + if distribution not in _VALID_DISTRIBUTIONS: + raise ValueError('distribution must be one of %s, not %s' + % (' '.join(_VALID_DISTRIBUTIONS), distribution)) random_state = check_random_state(random_state) - A = np.asarray_chfinite(A) - m, n = A.shape - - # sample l columns with equal probability - idx = random_state.choice(n, size=l, replace=False) - return A[:, idx] + # converts A to array, raise ValueError if A has inf or nan + A = np.asarray_chkfinite(A) if check_finite else np.asarray(A) + if row_oversample is None: + row_oversample = 2 * column_oversample -@_axis_wrapper -def randomized_basic(A, l, axis=1, random_state=None): - """ - - Given an m x n matrix A, and an integer l, this scheme computes an m x l - orthonormail matrix Q whose range approximates the range of A - - Notes - ----- - Also known as randQB - """ - random_state = check_random_state(random_state) + # check rank + # TODO:rank = _output_rank_check(A, output_rank) + row_oversample += output_rank + column_oversample += output_rank - A = np.asarray_chfinite(A) - m, n = A.shape + # get numpy random func + dist_func = random_state.randn#_get_distribution_func(distribution, random_state) - # construct gaussian random matrix - Omega = random_state.standard_normal(size=(n, l)).astype(A.dtype) + #Generate a random test matrix Omega + Omega = dist_func(size=(A.shape[1], column_oversample)).astype(A.dtype) + Psi = dist_func(size=(row_oversample, A.shape[0])).astype(A.dtype) - # project A onto Omega - Y = A.dot(Omega) + if A.dtype == np.complexfloating: + real_type = np.float32 if A.dtype == np.complex64 else np.float64 + Omega += 1j * dist_func(size=(A.shape[1], column_oversample)).astype(real_type) + Psi += 1j * dist_func(size=(row_oversample, A.shape[0])).astype(real_type) - # orthonormalize Y - Q = _ortho(Y) - B = Q.T.dot(A) - - return Q, B - - -@_axis_wrapper -def randomized_blocked_adaptive(A, r=10, tol=1e-3, axis=1, random_state=None): - """ - - Given an m x n matrix A, a tolerance tol, and an iteger r, - adaptive_randomized_range_finder computes an orthonormal matrix Q such - that `` | I - Q Q^*) A | <= tol `` holds with at least probability - ``1 - min(m, n)*10^-r``. - - Notes - ----- - Also known as randQB_b - """ - random_state = check_random_state(random_state) + # orthogonalize + Omega, _ = linalg.qr(Omega, **_QR_KWARGS) + Psi , _ = linalg.qr(conjugate_transpose(Psi), **_QR_KWARGS) + Psi = conjugate_transpose(Psi) - A = np.asarray_chfinite(A) - m, n = A.shape + #Build sample matrix Y = A * Omega and W = Psi * A + #Note: Y should approximate the column space and W the row space of A + #Y = A.dot(Omega) + #W = Psi.dot(A) + #del Omega - Q_iters, QQT_iters, B_iters = [], [], [] - for _ in range(r): - # construct gaussian random matrix - Omega = random_state.standard_normal(size=(n, r)).astype(A.dtype) - - Q = _ortho(A.dot(Omega)) - - if Q_iters: - # Qi = orth(Qi - sum(Qj Qj^T Qi)) - Q -= reduce(op.add, map(lambda x: x.dot(Q), QQT_iters)) - - Q = _ortho(Q) - - # compute QB decomposition - B = Q.T.dot(A) - - # break if we reach desired rank - if linalg.norm(A - Q.dot(B)) < tol: - break - - # update - Q_iters.append(Q) - B_iters.append(B) - QQT_iters.append(Q.dot(Q.T)) - - return np.hstack(Q_iters), np.vstack(B_iters) - - -@_axis_wrapper -def randomized_subspace_iteration(A, l, n_iter=10, axis=1, random_state=None): - """ - - Given an m x n matrix A, and an integer l, this scheme computes an m x l - orthonormail matrix Q whose range approximates the range of A. - - """ - random_state = check_random_state(random_state) + #Orthogonalize Y using economic QR decomposition: Y=QR + #Q, _ = linalg.qr(Y, **_QR_KWARGS) + #U, T = linalg.qr(Psi.dot(Q), **_QR_KWARGS) - A = np.asarray_chfinite(A) - m, n = A.shape - - # construct gaussian random matrix - Omega = random_state.standard_normal(size=(n, l)).astype(A) - - # project A onto Omega - Y = A.dot(Omega) - - Q = _ortho(Y) - for _ in range(n_iter): - Z = _ortho(A.T.dot(Q)) - Q = _ortho(A.dot(Z)) - - B = Q.T.dot(A) - - return Q, B - - -# TODO: need to verify -#def fast_johnson_lindenstrauss_transform(A, l, random_state=None): -# """Fast Johnson-Lindenstrauss Transform. -# -# Given an m x n matrix A, and an integer l, this scheme computes an m x l -# orthonormail matrix Q whose range approximates the range of A -# -# """ -# m, n = A.shape -# -# d = random_state.choice((-1, 1), size=l) -# d = sparse.spdiags(d, 0, n, n) -# -# # project A onto d, compute DCT -# Ad = A.dot(d) -# Adf = fftpack.dct(Ad, axis=1, norm='ortho') -# -# # uniformly sample -# Q = randomized_uniform_sampling_transform(Adf, l) + # Form a smaller matrix + #return linalg.solve(T, conjugate_transpose(U).dot(W)) + return Omega, Psi diff --git a/ristretto/sketch/sketch.py b/ristretto/sketch/sketch.py deleted file mode 100644 index a287a43..0000000 --- a/ristretto/sketch/sketch.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Module containing random sketch generating object. -""" -# Authors: N. Benjamin Erichson -# Joseph Knox -# License: GNU General Public License v3.0 -from functools import partial -import warnings - -import numpy as np -from scipy import linalg - -from .utils import check_random_state, conjugate_transpose - -_VALID_DISTRIBUTIONS = ('uniform', 'normal') -_VALID_SINGLE_DISTRIBUTIONS = ('uniform', 'normal', 'orthogonal') -_VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) -_QR_KWARGS = dict(mode='economic', check_finite=False, overwrite_a=True) - -def sketch(A, method='randomized_subspace_iteration', *args, **kwargs): - try: - func = getattr(range_finders, method) - except AttributeError: - # TODO: write better error message - raise ValueError('incorrect method %s passed' ) - - return func(*args, **kwargs) - - -def single_pass_sketch(A, output_rank=None, row_oversample=None, - column_oversample=10, distribution='uniform', - check_finite=False, random_state=None): - random_state = check_random_state(random_state) - - # converts A to array, raise ValueError if A has inf or nan - A = np.asarray_chkfinite(A) if check_finite else np.asarray(A) - - if A.dtype not in _VALID_DTYPES: - raise ValueError('A.dtype must be one of %s, not %s' - % (' '.join(_VALID_DTYPES), A.dtype)) - - if distribution not in _VALID_SINGLE_DISTRIBUTIONS: - raise ValueError('distribution must be one of %s, not %s' - % (' '.join(_VALID_SINGLE_DISTRIBUTIONS), distribution)) - - if row_oversample is None: - row_oversample = 2 * column_oversample - - # check rank - rank = _output_rank_check(A, output_rank) - row_oversample += rank - column_oversample += rank - - # get numpy random func - dist_func = _get_distribution_func(distribution, random_state) - - #Generate a random test matrix Omega - Omega = dist_func(size=(A.shape[1], column_oversample)).astype(A.dtype) - Psi = dist_func(size=(row_oversample, A.shape[0])).astype(A.dtype) - - if A.dtype == np.complexfloating: - real_type = np.float32 if A.dtype == np.complex64 else np.float64 - Omega += 1j * dist_func(size=(A.shape[1], column_oversample)).astype(real_type) - Psi += 1j * dist_func(size=(row_oversample, A.shape[0])).astype(real_type) - - if distribution == 'orthogonal': - Omega, _ = linalg.qr(Omega, **_QR_KWARGS) - Psi , _ = linalg.qr(conjugate_transpose(Psi), **_QR_KWARGS) - Psi = conjugate_transpose(Psi) - - #Build sample matrix Y = A * Omega and W = Psi * A - #Note: Y should approximate the column space and W the row space of A - #Y = A.dot(Omega) - #W = Psi.dot(A) - #del Omega - - #Orthogonalize Y using economic QR decomposition: Y=QR - #Q, _ = linalg.qr(Y, **_QR_KWARGS) - #U, T = linalg.qr(Psi.dot(Q), **_QR_KWARGS) - - # Form a smaller matrix - #return linalg.solve(T, conjugate_transpose(U).dot(W)) - return Omega, Psi diff --git a/ristretto/sketch/tests/test_sketches.py b/ristretto/sketch/tests/test_sketches.py new file mode 100644 index 0000000..d00a0a3 --- /dev/null +++ b/ristretto/sketch/tests/test_sketches.py @@ -0,0 +1,80 @@ +import numpy as np +from numpy.testing import assert_raises + +from ristretto.sketch import _sketches + + +def test_random_axis_sample(): + # ------------------------------------------------------------------------ + # tests return correct size + m, n = 30, 10 + A = np.ones(m, n) + l = 3 + random_state = np.RandomState(123) + + row_idx = _sketches.random_axis_sample(A, l, 0, random_state) + col_idx = _sketches.random_axis_sample(A, l, 1, random_state) + + assert len(row_idx) == l + assert len(col_idx) == l + + # ------------------------------------------------------------------------ + # tests returns unique indices + assert len(np.unique(row_idx)) == len(row_idx) + assert len(np.unique(col_idx)) == len(col_idx) + + # ------------------------------------------------------------------------ + # tests return unique in axis + assert np.isin(row_idx, np.arange(m), assume_unique=True) + assert np.isin(col_idx, np.arange(n), assume_unique=True) + + +def test_random_gaussian_map(): + # ------------------------------------------------------------------------ + # tests return correct shape + m, n = 30, 10 + A = np.ones(m, n) + l = 3 + random_state = np.RandomState(123) + + row_sketch = _sketches.random_gaussian_map(A, l, 0, random_state) + col_sketch = _sketches.random_gaussian_map(A, l, 1, random_state) + + assert row_sketch.shape == (n, l) + assert row_sketch.shape == (m, l) + + # ------------------------------------------------------------------------ + # tests return correct data type + assert row_sketch.dtype == A.dtype + assert col_sketch.dtype == A.dtype + + +def test_sparse_random_map(): + # ------------------------------------------------------------------------ + # tests return correct shape + m, n = 30, 10 + A = np.ones(m, n) + l = 3 + density = 1./3 + random_state = np.RandomState(123) + + row_sketch = _sketches.sparse_random_map(A, l, 0, density, random_state) + col_sketch = _sketches.sparse_random_map(A, l, 1, density, random_state) + + assert row_sketch.shape == (n, l) + assert row_sketch.shape == (m, l) + + # ------------------------------------------------------------------------ + # tests return correct data type + assert row_sketch.dtype == A.dtype + assert col_sketch.dtype == A.dtype + + # ------------------------------------------------------------------------ + # tests returns correct density + assert len(row_sketch.nnz) == int(density*m*n) + assert len(col_sketch.nnz) == int(density*m*n) + + # ------------------------------------------------------------------------ + # tests raises error when density not in [0,1] + assert_raises(ValueError, _sketches.sparse_random_map, A, l, -1, random_state) + assert_raises(ValueError, _sketches.sparse_random_map, A, l, 1.1, random_state) diff --git a/ristretto/sketch/tests/test_transforms.py b/ristretto/sketch/tests/test_transforms.py new file mode 100644 index 0000000..63cb4ae --- /dev/null +++ b/ristretto/sketch/tests/test_transforms.py @@ -0,0 +1,100 @@ +import numpy as np +from numpy.testing import assert_raises + +from ristretto.sketch.transforms import randomized_uniform_sampling +from ristretto.sketch.transforms import johnson_lindenstrauss +from ristretto.sketch.transforms import sparse_johnson_lindenstrauss +from ristretto.sketch.transforms import fast_johnson_lindenstrauss +from ristretto.sketch.transforms import randomized_subspace_iteration + + +def test_randomized_uniform_sampling(): + # ------------------------------------------------------------------------ + # tests return correct size + m, n = 30, 10 + A = np.ones(m, n) + l = 3 + + row_trans = randomized_uniform_sampling(A, l, axis=0) + col_trans = randomized_uniform_sampling(A, l, axis=1) + + assert row_trans.shape == (l, n) + assert col_trans.shape == (m, l) + + # ------------------------------------------------------------------------ + # tests raises incompatible axis + assert_raises(ValueError, randomized_uniform_sampling, A, l, axis=2) + + # ------------------------------------------------------------------------ + # tests raises incompatible A dimensions + assert_raises(ValueError, randomized_uniform_sampling, A[5], l) + + +def test_johnson_linderstrauss(): + # ------------------------------------------------------------------------ + # tests return correct size + m, n = 30, 10 + A = np.ones(m, n) + l = 3 + + row_trans = johnson_lindenstrauss(A, l, axis=0) + col_trans = johnson_lindenstrauss(A, l, axis=1) + + assert row_trans.shape == (l, n) + assert col_trans.shape == (m, l) + + # ------------------------------------------------------------------------ + # tests raises incompatible n_subspace + assert_raises(ValueError, johnson_lindenstrauss, A, l, n_subspace=-1) + + # ------------------------------------------------------------------------ + # tests raises incompatible axis + assert_raises(ValueError, johnson_lindenstrauss, A, l, axis=2) + + # ------------------------------------------------------------------------ + # tests raises incompatible A dimensions + assert_raises(ValueError, johnson_lindenstrauss, A[5], l) + + +def test_sparse_johnson_linderstrauss(): + # ------------------------------------------------------------------------ + # tests return correct size + m, n = 30, 10 + A = np.ones(m, n) + l = 3 + + row_trans = sparse_johnson_lindenstrauss(A, l, axis=0) + col_trans = sparse_johnson_lindenstrauss(A, l, axis=1) + + assert row_trans.shape == (l, n) + assert col_trans.shape == (m, l) + + # ------------------------------------------------------------------------ + # tests raises incompatible axis + assert_raises(ValueError, sparse_johnson_lindenstrauss, A, l, axis=2) + + # ------------------------------------------------------------------------ + # tests raises incompatible A dimensions + assert_raises(ValueError, sparse_johnson_lindenstrauss, A[5], l) + + +def test_fast_johnson_linderstrauss(): + # ------------------------------------------------------------------------ + # tests return correct size + m, n = 30, 10 + A = np.ones(m, n) + l = 3 + + row_trans = fast_johnson_lindenstrauss(A, l, axis=0) + col_trans = fast_johnson_lindenstrauss(A, l, axis=1) + + assert row_trans.shape == (l, n) + assert col_trans.shape == (m, l) + + # ------------------------------------------------------------------------ + # tests raises incompatible axis + assert_raises(ValueError, fast_johnson_lindenstrauss, A, l, axis=2) + + # ------------------------------------------------------------------------ + # tests raises incompatible A dimensions + assert_raises(ValueError, fast_johnson_lindenstrauss, A[5], l) diff --git a/ristretto/sketch/tests/test_utils.py b/ristretto/sketch/tests/test_utils.py new file mode 100644 index 0000000..bc8f933 --- /dev/null +++ b/ristretto/sketch/tests/test_utils.py @@ -0,0 +1,40 @@ +import numpy as np +from numpy.testing import assert_raises + +from ristretto.sketch.utils import orthonormalize +from ristretto.sketch.utils import perform_subspace_iterations + + +def test_orthonormalize(): + # ------------------------------------------------------------------------ + # test overwrite + A = np.eye(10) + + B = orthonormalize(A, overwrite_a=False) + over = orthonormalize(A, overwrite_a=True) + + assert A is not B + assert A is over + + # ------------------------------------------------------------------------ + # test return array type + assert A.dtype == B.dtype + + # ------------------------------------------------------------------------ + # test check_finite + A[0,0] = np.inf + assert_raises(ValueError, orthonormalize, A, check_finite=True) + + +def test_perform_subspace_iterations(): + # ------------------------------------------------------------------------ + # test shapes + A = np.eye(10) + Q_row = np.eye(10)[:5] + Q_col = np.eye(10)[:,:5] + + rowwise = perform_subspace_iterations(A, Q_row) + colwise = perform_subspace_iterations(A, Q_col) + + assert rowwise.shape == Q_row.shape + assert colwise.shape == Q_col.shape diff --git a/ristretto/sketch/transforms.py b/ristretto/sketch/transforms.py new file mode 100644 index 0000000..dc2f963 --- /dev/null +++ b/ristretto/sketch/transforms.py @@ -0,0 +1,138 @@ +""" +Functions for approximating the range of a matrix A. +""" +import numpy as np +from scipy import fftpack +from scipy import sparse + +from . import _sketches +from .utils import perform_subspace_iterations +from ..utils import check_random_state, safe_sparse_dot + + +def randomized_uniform_sampling(A, l, axis=1, random_state=None): + """Uniform randomized sampling transform. + + Given an m x n matrix A, and an integer l, this returns an m x l + random subset of the range of A. + + """ + random_state = check_random_state(random_state) + + A = np.asarray(A) + if A.ndim != 2: + raise ValueError('A must be a 2D array, not %dD' % A.ndim) + + if axis not in (0, 1): + raise ValueError('If supplied, axis must be in (0, 1)') + + # sample l rows/columns with equal probability + idx = _sketches.random_axis_sample(A, l, axis, random_state) + + if axis == 0: + return A[idx] + return A[:, idx] + + +def johnson_lindenstrauss(A, l, n_subspace=None, axis=1, random_state=None): + """ + + Given an m x n matrix A, and an integer l, this scheme computes an m x l + orthonormal matrix Q whose range approximates the range of A + + Notes + ----- + Johnson-Lindenstrauss + #Also known as randQB + """ + random_state = check_random_state(random_state) + + A = np.asarray(A) + if A.ndim != 2: + raise ValueError('A must be a 2D array, not %dD' % A.ndim) + + if axis not in (0, 1): + raise ValueError('If supplied, axis must be in (0, 1)') + + # construct gaussian random matrix + Omega = _sketches.random_gaussian_map(A, l, axis, random_state) + + # project A onto Omega + if axis == 0: + Q = Omega.T.dot(A) + else: + Q = A.dot(Omega) + + if n_subspace is not None: + Q = perform_subspace_iterations(A, Q, n_iter=n_subspace, axis=axis) + + return Q + + +def sparse_johnson_lindenstrauss(A, l, density=None, axis=1, random_state=None): + """ + + Given an m x n matrix A, and an integer l, this scheme computes an m x l + orthonormal matrix Q whose range approximates the range of A + + Parameters + ---------- + density : sparse matrix density + + Notes + ----- + #Also known as randQB + """ + random_state = check_random_state(random_state) + + A = np.asarray(A) + if A.ndim != 2: + raise ValueError('A must be a 2D array, not %dD' % A.ndim) + + if axis not in (0, 1): + raise ValueError('If supplied, axis must be in (0, 1)') + + if density is None: + density = 1.0 / 3 + + Omega = _sketches.sparse_random_map(A, l, axis, density, random_state) + + # project A onto Omega + if axis == 0: + return safe_sparse_dot(Omega.T, A) + return safe_sparse_dot(A, Omega) + + +def fast_johnson_lindenstrauss(A, l, axis=1, random_state=None): + """ + + Given an m x n matrix A, and an integer l, this scheme computes an m x l + orthonormal matrix Q whose range approximates the range of A + + Notes + ----- + Johnson-Lindenstrauss + #Also known as randQB + """ + random_state = check_random_state(random_state) + + A = np.asarray_chfinite(A) + if A.ndim != 2: + raise ValueError('A must be a 2D array, not %dD' % A.ndim) + + if axis not in (0, 1): + raise ValueError('If supplied, axis must be in (0, 1)') + + # TODO: Find name for sketch and put in _sketches + # construct gaussian random matrix + diag = random_state.choice((-1, 1), size=A.shape[axis]).astype(A.dtype) + + if axis == 0: + diag = diag[:, np.newaxis] + + # discrete fourier transform of AD (or DA) + FDA = fftpack.dct(A * diag, axis=axis, norm='ortho') + + # randomly sample axis + return randomized_uniform_sampling( + FDA, l, axis=axis, random_state=random_state) diff --git a/ristretto/sketch/utils.py b/ristretto/sketch/utils.py new file mode 100644 index 0000000..50988f6 --- /dev/null +++ b/ristretto/sketch/utils.py @@ -0,0 +1,29 @@ +""" +Module containing utility functions for +""" +from scipy import linalg + + +def orthonormalize(A, overwrite_a=True, check_finite=False): + """orthonormalize the columns of A via QR decomposition""" + # NOTE: for A(m, n) 'economic' returns Q(m, k), R(k, n) where k is min(m, n) + Q, _ = linalg.qr(A, overwrite_a=overwrite_a, check_finite=check_finite, + mode='economic', pivoting=False) + return Q + + +def perform_subspace_iterations(A, Q, n_iter=1, axis=0): + """perform subspace iterations on Q""" + # orthonormalize Y, overwriting + Q = orthonormalize(Q) + + # perform subspace iterations + for _ in range(n_iter): + if axis == 0: + Z = orthonormalize(A.dot(Q.T)) + Q = orthonormalize(A.T.dot(Z)) + else: + Z = orthonormalize(A.T.dot(Q)) + Q = orthonormalize(A.dot(Z)) + + return Q diff --git a/ristretto/utils.py b/ristretto/utils.py index 74be36c..77085c1 100644 --- a/ristretto/utils.py +++ b/ristretto/utils.py @@ -12,6 +12,7 @@ def check_non_negative(X, whom): + # NOTE: copied from scikit-learn """Check if there is any negative value in an array. Parameters ---------- @@ -26,6 +27,7 @@ def check_non_negative(X, whom): def check_random_state(seed): + # NOTE: copied from scikit-learn """Turn seed into a np.random.RandomState instance Parameters ---------- @@ -52,6 +54,35 @@ def conjugate_transpose(A): return A.T +def safe_sparse_dot(a, b, dense_output=False): + # NOTE: copied from scikit-learn + """Dot product that handle the sparse matrix case correctly + + Uses BLAS GEMM as replacement for numpy.dot where possible + to avoid unnecessary copies. + + Parameters + ---------- + a : array or sparse matrix + b : array or sparse matrix + dense_output : boolean, default False + When False, either ``a`` or ``b`` being sparse will yield sparse + output. When True, output will always be an array. + + Returns + ------- + dot_product : array or sparse matrix + sparse if ``a`` or ``b`` is sparse and ``dense_output=False``. + """ + if issparse(a) or issparse(b): + ret = a * b + if dense_output and hasattr(ret, "toarray"): + ret = ret.toarray() + return ret + else: + return np.dot(a, b) + + def nmf_data(m, n, k, factor_type='normal', noise_type='normal', noiselevel=0): _factor_types = ('normal', 'unif') From 1838bb84db0c83693dd87ecd6bf9c4d18d806a8f Mon Sep 17 00:00:00 2001 From: Joseph Knox Date: Mon, 21 May 2018 11:19:37 -0700 Subject: [PATCH 3/7] WIP: Tests passing for sketch module. Work in progress. ``sketch`` module is implemented, but old compatibility funcitons in ``sketch.range_finders`` still need to be phased out. --- ristretto/sketch/__init__.py | 2 +- ristretto/sketch/range_finders.py | 110 ++++++++++++++++++---- ristretto/sketch/tests/test_sketches.py | 28 +++--- ristretto/sketch/tests/test_transforms.py | 13 +-- ristretto/sketch/tests/test_utils.py | 7 +- ristretto/sketch/transforms.py | 2 +- ristretto/sketch/utils.py | 19 ++-- ristretto/utils.py | 2 +- 8 files changed, 129 insertions(+), 54 deletions(-) diff --git a/ristretto/sketch/__init__.py b/ristretto/sketch/__init__.py index 1b50b9e..ea46cc9 100644 --- a/ristretto/sketch/__init__.py +++ b/ristretto/sketch/__init__.py @@ -4,4 +4,4 @@ # TODO: FOR COMPATIBILITY: change once range_finders rewritten from . import * -from .range_finders import sketch, single_pass_sketch +from .range_finders import sketch, single_pass_sketch, _get_distribution_func diff --git a/ristretto/sketch/range_finders.py b/ristretto/sketch/range_finders.py index 250abb0..6186559 100644 --- a/ristretto/sketch/range_finders.py +++ b/ristretto/sketch/range_finders.py @@ -10,44 +10,116 @@ import numpy as np from scipy import linalg -from . import transforms from ..utils import check_random_state, conjugate_transpose _VALID_DISTRIBUTIONS = ('uniform', 'normal') +_VALID_SINGLE_DISTRIBUTIONS = ('uniform', 'normal', 'orthogonal') +_VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) _QR_KWARGS = dict(mode='economic', check_finite=False, overwrite_a=True) -def sketch(A, method='randomized_subspace_iteration', *args, **kwargs): - try: - func = getattr(transforms, method) - except AttributeError: - # TODO: write better error message - raise ValueError('incorrect method %s passed' ) - return func(*args, **kwargs) +def _output_rank_check(A, output_rank): + n, m = A.shape + rank = min(n, m) if output_rank is None else output_rank + if rank > min(n, m): + warnings.warn('output_rank %d is greater than the minimum ' + 'dimension of input array A (shape %s). The ' + 'minimum dimension will be chosen instead', + output_rank, n, m, min(n, m)) + rank = min(n, m) + return rank -def single_pass_sketch(A, output_rank=None, row_oversample=None, - column_oversample=10, distribution='uniform', - check_finite=False, random_state=None): + +def _get_distribution_func(distribution, random_state): + if distribution == 'uniform': + return partial(random_state.uniform, -1, 1) + return random_state.standard_normal + + +def sketch(A, out=None, output_rank=None, n_oversample=10, n_iter=2, + distribution='uniform', axis=0, check_finite=False, random_state=None): + random_state = check_random_state(random_state) + + # converts A to array, raise ValueError if A has inf or nan + A = np.asarray_chkfinite(A) if check_finite else np.asarray(A) + + if A.dtype not in _VALID_DTYPES: + raise ValueError('A.dtype must be one of %s, not %s' + % (' '.join(_VALID_DTYPES), A.dtype)) if distribution not in _VALID_DISTRIBUTIONS: raise ValueError('distribution must be one of %s, not %s' % (' '.join(_VALID_DISTRIBUTIONS), distribution)) + + if axis not in (0, 1): + raise ValueError('If specified, axis must be 0 or 1, not %s' % axis) + + # check rank + rank = _output_rank_check(A, output_rank) + + n_oversample += rank + size = (n_oversample, A.shape[0]) if axis == 0 else (A.shape[1], n_oversample) + + # get numpy random func + dist_func = _get_distribution_func(distribution, random_state) + + #Generate a random test matrix Omega + Omega = dist_func(size=size).astype(A.dtype) + + if A.dtype == np.complexfloating: + real_type = np.float32 if A.dtype == np.complex64 else np.float64 + Omega += 1j * dist_func(size=size).astype(real_type) + + #Build sample matrix Y : Y = A * Omega + # Y approximates range of A + Y = A.dot(Omega) + del Omega + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + #Orthogonalize Y using economic QR decomposition: Y=QR + #If q > 0 perfrom q subspace iterations + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + for _ in range(n_iter): + Y, _ = linalg.qr(Y, **_QR_KWARGS) + Z, _ = linalg.qr(conjugate_transpose(A).dot(Y), **_QR_KWARGS) + Y = A.dot(Z) + + # compute sketch + S, _ = linalg.qr(Y, **_QR_KWARGS) + + #if axis == 0: + # return np.dot(conjugate_transpose(S), A, out=out) + #return np.dot(A, S, out=out) + return S + + +def single_pass_sketch(A, output_rank=None, row_oversample=None, + column_oversample=10, distribution='uniform', + check_finite=False, random_state=None): random_state = check_random_state(random_state) # converts A to array, raise ValueError if A has inf or nan A = np.asarray_chkfinite(A) if check_finite else np.asarray(A) + if A.dtype not in _VALID_DTYPES: + raise ValueError('A.dtype must be one of %s, not %s' + % (' '.join(_VALID_DTYPES), A.dtype)) + + if distribution not in _VALID_SINGLE_DISTRIBUTIONS: + raise ValueError('distribution must be one of %s, not %s' + % (' '.join(_VALID_SINGLE_DISTRIBUTIONS), distribution)) + if row_oversample is None: row_oversample = 2 * column_oversample # check rank - # TODO:rank = _output_rank_check(A, output_rank) - row_oversample += output_rank - column_oversample += output_rank + rank = _output_rank_check(A, output_rank) + row_oversample += rank + column_oversample += rank # get numpy random func - dist_func = random_state.randn#_get_distribution_func(distribution, random_state) + dist_func = _get_distribution_func(distribution, random_state) #Generate a random test matrix Omega Omega = dist_func(size=(A.shape[1], column_oversample)).astype(A.dtype) @@ -58,10 +130,10 @@ def single_pass_sketch(A, output_rank=None, row_oversample=None, Omega += 1j * dist_func(size=(A.shape[1], column_oversample)).astype(real_type) Psi += 1j * dist_func(size=(row_oversample, A.shape[0])).astype(real_type) - # orthogonalize - Omega, _ = linalg.qr(Omega, **_QR_KWARGS) - Psi , _ = linalg.qr(conjugate_transpose(Psi), **_QR_KWARGS) - Psi = conjugate_transpose(Psi) + if distribution == 'orthogonal': + Omega, _ = linalg.qr(Omega, **_QR_KWARGS) + Psi , _ = linalg.qr(conjugate_transpose(Psi), **_QR_KWARGS) + Psi = conjugate_transpose(Psi) #Build sample matrix Y = A * Omega and W = Psi * A #Note: Y should approximate the column space and W the row space of A diff --git a/ristretto/sketch/tests/test_sketches.py b/ristretto/sketch/tests/test_sketches.py index d00a0a3..ad07cdf 100644 --- a/ristretto/sketch/tests/test_sketches.py +++ b/ristretto/sketch/tests/test_sketches.py @@ -8,9 +8,9 @@ def test_random_axis_sample(): # ------------------------------------------------------------------------ # tests return correct size m, n = 30, 10 - A = np.ones(m, n) + A = np.ones((m, n)) l = 3 - random_state = np.RandomState(123) + random_state = np.random.RandomState(123) row_idx = _sketches.random_axis_sample(A, l, 0, random_state) col_idx = _sketches.random_axis_sample(A, l, 1, random_state) @@ -25,23 +25,23 @@ def test_random_axis_sample(): # ------------------------------------------------------------------------ # tests return unique in axis - assert np.isin(row_idx, np.arange(m), assume_unique=True) - assert np.isin(col_idx, np.arange(n), assume_unique=True) + assert all(np.isin(row_idx, np.arange(m), assume_unique=True)) + assert all(np.isin(col_idx, np.arange(n), assume_unique=True)) def test_random_gaussian_map(): # ------------------------------------------------------------------------ # tests return correct shape m, n = 30, 10 - A = np.ones(m, n) + A = np.ones((m, n)) l = 3 - random_state = np.RandomState(123) + random_state = np.random.RandomState(123) row_sketch = _sketches.random_gaussian_map(A, l, 0, random_state) col_sketch = _sketches.random_gaussian_map(A, l, 1, random_state) - assert row_sketch.shape == (n, l) assert row_sketch.shape == (m, l) + assert col_sketch.shape == (n, l) # ------------------------------------------------------------------------ # tests return correct data type @@ -53,16 +53,16 @@ def test_sparse_random_map(): # ------------------------------------------------------------------------ # tests return correct shape m, n = 30, 10 - A = np.ones(m, n) + A = np.ones((m, n)) l = 3 density = 1./3 - random_state = np.RandomState(123) + random_state = np.random.RandomState(123) row_sketch = _sketches.sparse_random_map(A, l, 0, density, random_state) col_sketch = _sketches.sparse_random_map(A, l, 1, density, random_state) - assert row_sketch.shape == (n, l) assert row_sketch.shape == (m, l) + assert col_sketch.shape == (n, l) # ------------------------------------------------------------------------ # tests return correct data type @@ -71,10 +71,10 @@ def test_sparse_random_map(): # ------------------------------------------------------------------------ # tests returns correct density - assert len(row_sketch.nnz) == int(density*m*n) - assert len(col_sketch.nnz) == int(density*m*n) + assert row_sketch.nnz == int(density*m*l) + assert col_sketch.nnz == int(density*n*l) # ------------------------------------------------------------------------ # tests raises error when density not in [0,1] - assert_raises(ValueError, _sketches.sparse_random_map, A, l, -1, random_state) - assert_raises(ValueError, _sketches.sparse_random_map, A, l, 1.1, random_state) + assert_raises(ValueError, _sketches.sparse_random_map, A, l, 0, -1, random_state) + assert_raises(ValueError, _sketches.sparse_random_map, A, l, 0, 1.1, random_state) diff --git a/ristretto/sketch/tests/test_transforms.py b/ristretto/sketch/tests/test_transforms.py index 63cb4ae..aa55585 100644 --- a/ristretto/sketch/tests/test_transforms.py +++ b/ristretto/sketch/tests/test_transforms.py @@ -5,14 +5,13 @@ from ristretto.sketch.transforms import johnson_lindenstrauss from ristretto.sketch.transforms import sparse_johnson_lindenstrauss from ristretto.sketch.transforms import fast_johnson_lindenstrauss -from ristretto.sketch.transforms import randomized_subspace_iteration def test_randomized_uniform_sampling(): # ------------------------------------------------------------------------ # tests return correct size m, n = 30, 10 - A = np.ones(m, n) + A = np.ones((m, n)) l = 3 row_trans = randomized_uniform_sampling(A, l, axis=0) @@ -34,7 +33,7 @@ def test_johnson_linderstrauss(): # ------------------------------------------------------------------------ # tests return correct size m, n = 30, 10 - A = np.ones(m, n) + A = np.ones((m, n)) l = 3 row_trans = johnson_lindenstrauss(A, l, axis=0) @@ -43,10 +42,6 @@ def test_johnson_linderstrauss(): assert row_trans.shape == (l, n) assert col_trans.shape == (m, l) - # ------------------------------------------------------------------------ - # tests raises incompatible n_subspace - assert_raises(ValueError, johnson_lindenstrauss, A, l, n_subspace=-1) - # ------------------------------------------------------------------------ # tests raises incompatible axis assert_raises(ValueError, johnson_lindenstrauss, A, l, axis=2) @@ -60,7 +55,7 @@ def test_sparse_johnson_linderstrauss(): # ------------------------------------------------------------------------ # tests return correct size m, n = 30, 10 - A = np.ones(m, n) + A = np.ones((m, n)) l = 3 row_trans = sparse_johnson_lindenstrauss(A, l, axis=0) @@ -82,7 +77,7 @@ def test_fast_johnson_linderstrauss(): # ------------------------------------------------------------------------ # tests return correct size m, n = 30, 10 - A = np.ones(m, n) + A = np.ones((m, n)) l = 3 row_trans = fast_johnson_lindenstrauss(A, l, axis=0) diff --git a/ristretto/sketch/tests/test_utils.py b/ristretto/sketch/tests/test_utils.py index bc8f933..da13949 100644 --- a/ristretto/sketch/tests/test_utils.py +++ b/ristretto/sketch/tests/test_utils.py @@ -14,7 +14,8 @@ def test_orthonormalize(): over = orthonormalize(A, overwrite_a=True) assert A is not B - assert A is over + # TODO: when does overwrite_a even work? (fortran?) + #assert A is over # ------------------------------------------------------------------------ # test return array type @@ -33,8 +34,8 @@ def test_perform_subspace_iterations(): Q_row = np.eye(10)[:5] Q_col = np.eye(10)[:,:5] - rowwise = perform_subspace_iterations(A, Q_row) - colwise = perform_subspace_iterations(A, Q_col) + rowwise = perform_subspace_iterations(A, Q_row, axis=0) + colwise = perform_subspace_iterations(A, Q_col, axis=1) assert rowwise.shape == Q_row.shape assert colwise.shape == Q_col.shape diff --git a/ristretto/sketch/transforms.py b/ristretto/sketch/transforms.py index dc2f963..2701050 100644 --- a/ristretto/sketch/transforms.py +++ b/ristretto/sketch/transforms.py @@ -116,7 +116,7 @@ def fast_johnson_lindenstrauss(A, l, axis=1, random_state=None): """ random_state = check_random_state(random_state) - A = np.asarray_chfinite(A) + A = np.asarray_chkfinite(A) if A.ndim != 2: raise ValueError('A must be a 2D array, not %dD' % A.ndim) diff --git a/ristretto/sketch/utils.py b/ristretto/sketch/utils.py index 50988f6..2134b18 100644 --- a/ristretto/sketch/utils.py +++ b/ristretto/sketch/utils.py @@ -7,6 +7,7 @@ def orthonormalize(A, overwrite_a=True, check_finite=False): """orthonormalize the columns of A via QR decomposition""" # NOTE: for A(m, n) 'economic' returns Q(m, k), R(k, n) where k is min(m, n) + # TODO: when does overwrite_a even work? (fortran?) Q, _ = linalg.qr(A, overwrite_a=overwrite_a, check_finite=check_finite, mode='economic', pivoting=False) return Q @@ -14,16 +15,22 @@ def orthonormalize(A, overwrite_a=True, check_finite=False): def perform_subspace_iterations(A, Q, n_iter=1, axis=0): """perform subspace iterations on Q""" + # TODO: can we figure out how not to transpose for row wise + if axis == 0: + Q = Q.T + # orthonormalize Y, overwriting Q = orthonormalize(Q) # perform subspace iterations for _ in range(n_iter): - if axis == 0: - Z = orthonormalize(A.dot(Q.T)) - Q = orthonormalize(A.T.dot(Z)) - else: - Z = orthonormalize(A.T.dot(Q)) - Q = orthonormalize(A.dot(Z)) + #if axis == 0: + # Z = orthonormalize(A.dot(Q.T)) + # Q = orthonormalize(A.T.dot(Z)) + #else: + Z = orthonormalize(A.T.dot(Q)) + Q = orthonormalize(A.dot(Z)) + if axis == 0: + return Q.T return Q diff --git a/ristretto/utils.py b/ristretto/utils.py index 77085c1..786b0d5 100644 --- a/ristretto/utils.py +++ b/ristretto/utils.py @@ -74,7 +74,7 @@ def safe_sparse_dot(a, b, dense_output=False): dot_product : array or sparse matrix sparse if ``a`` or ``b`` is sparse and ``dense_output=False``. """ - if issparse(a) or issparse(b): + if sp.issparse(a) or sp.issparse(b): ret = a * b if dense_output and hasattr(ret, "toarray"): ret = ret.toarray() From 89b703017ad90e56ce6382bb4686cab3430c46bc Mon Sep 17 00:00:00 2001 From: Joseph Knox Date: Tue, 22 May 2018 18:29:53 -0700 Subject: [PATCH 4/7] WIP: Finished phasing out old sketch module. Still need to better incorporate within random methods, perhaps giving transform options. Also still need to standardize parameter names. Additionally, single_pass methods have been depricated (masked with a single subspace iteration) for now because the previous implementation was not in fact 'single_pass'. --- ristretto/eigen.py | 17 ++- ristretto/interp_decomp.py | 6 +- ristretto/lu.py | 5 +- ristretto/qb.py | 51 ++++---- ristretto/sketch/__init__.py | 4 - ristretto/sketch/_sketches.py | 5 + ristretto/sketch/range_finders.py | 150 ---------------------- ristretto/sketch/tests/test_transforms.py | 4 +- ristretto/sketch/transforms.py | 21 +-- 9 files changed, 50 insertions(+), 213 deletions(-) delete mode 100644 ristretto/sketch/range_finders.py diff --git a/ristretto/eigen.py b/ristretto/eigen.py index fef704d..ee4740f 100644 --- a/ristretto/eigen.py +++ b/ristretto/eigen.py @@ -5,12 +5,14 @@ # Joseph Knox # License: GNU General Public License v3.0 +# TODO: repace nystroem_col with random uniform sampling + from __future__ import division, print_function import numpy as np from scipy import linalg -from .sketch import sketch +from .sketch.transforms import johnson_lindenstrauss, randomized_uniform_sampling from .utils import check_random_state, conjugate_transpose _VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) @@ -64,8 +66,9 @@ def reigh(A, k, p=20, q=2, sdist='normal', random_state=None): (available at `arXiv `_). """ # get random sketch - Q = sketch(A, output_rank=k, n_oversample=p, n_iter=q, distribution=sdist, - axis=1, check_finite=True, random_state=random_state) + Q = johnson_lindenstrauss(A, k + p, n_subspace=q, axis=1, random_state=random_state) + #Q = sketch(A, output_rank=k, n_oversample=p, n_iter=q, distribution=sdist, + # axis=1, check_finite=True, random_state=random_state) #Project the data matrix a into a lower dimensional subspace B = A.dot(Q) @@ -124,8 +127,9 @@ def reigh_nystroem(A, k, p=10, q=2, sdist='normal', random_state=None): (available at `arXiv `_). """ # get random sketch - S = sketch(A, output_rank=k, n_oversample=p, n_iter=q, distribution=sdist, - axis=1, check_finite=True, random_state=random_state) + S = johnson_lindenstrauss(A, k + p, n_subspace=q, axis=1, random_state=random_state) + #S = sketch(A, output_rank=k, n_oversample=p, n_iter=q, distribution=sdist, + # axis=1, check_finite=True, random_state=random_state) #Project the data matrix a into a lower dimensional subspace B1 = A.dot(S) @@ -197,6 +201,9 @@ def reigh_nystroem_col(A, k, p=0, random_state=None): decompositions" (2009). (available at `arXiv `_). """ + + # TODO: repace with random uniform sampling + random_state = check_random_state(random_state) # converts A to array, raise ValueError if A has inf or nan diff --git a/ristretto/interp_decomp.py b/ristretto/interp_decomp.py index fd00cc5..7cc3d0a 100644 --- a/ristretto/interp_decomp.py +++ b/ristretto/interp_decomp.py @@ -11,13 +11,17 @@ from scipy import linalg from .qb import rqb -from .sketch import _get_distribution_func from .utils import check_random_state, conjugate_transpose _VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) _VALID_SDISTS = ('uniform', 'normal') _VALID_MODES = ('row', 'column') +def _get_distribution_func(distribution, random_state): + if distribution == 'uniform': + return partial(random_state.uniform, -1, 1) + return random_state.standard_normal + def interp_decomp(A, k=None, mode='column', index_set=False): """Interpolative decomposition (ID). diff --git a/ristretto/lu.py b/ristretto/lu.py index 29533d4..15d6d75 100644 --- a/ristretto/lu.py +++ b/ristretto/lu.py @@ -11,7 +11,7 @@ from scipy import linalg from scipy import sparse -from .sketch import sketch +from .sketch.transforms import johnson_lindenstrauss from .utils import conjugate_transpose @@ -85,8 +85,7 @@ def rlu(A, permute=False, k=None, p=10, q=1, sdist='uniform', random_state=None) (available at `arXiv `_). """ # get random sketch - S = sketch(A, output_rank=k, n_oversample=p, n_iter=q, distribution=sdist, - axis=1, check_finite=True, random_state=random_state) + S = johnson_lindenstrauss(A, k + p, n_subspace=q, axis=1, random_state=random_state) # Compute pivoted LU decompostion of the orthonormal basis matrix Q. # Q = P * L * U diff --git a/ristretto/qb.py b/ristretto/qb.py index 8970cf9..6de66d3 100644 --- a/ristretto/qb.py +++ b/ristretto/qb.py @@ -5,10 +5,17 @@ # Joseph Knox # License: GNU General Public License v3.0 +# NOTE: we should depricate single_pass because: +# we get the same performance by perfoming a single subspace +# iteration, the single pass construct doesn't really help us +# here unless we write it in C/Cython and compute the products +# Y = A * Omega, Y_tilde = A * Omega_tilde +# in sinlge passes + import numpy as np from scipy import linalg -from .sketch import sketch, single_pass_sketch +from .sketch.transforms import johnson_lindenstrauss from .utils import conjugate_transpose @@ -50,7 +57,9 @@ def rqb(A, k=None, p=10, l=None, q=1, sdist='normal', single_pass=False, 'normal' : Random test matrix with normal distributed elements. single_pass : bool - If single_pass == True, perfom single pass of algorithm. + If single_pass == True, perfom single pass of algorithm, meaning that + in the algorithm only accesses A directly a single time. Beneficial A + is large. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; @@ -81,32 +90,18 @@ def rqb(A, k=None, p=10, l=None, q=1, sdist='normal', single_pass=False, and GPU architectures" (2015). (available at `arXiv `_). """ + # get random sketch if single_pass: - # Form a smaller matrix - Omega, Psi = single_pass_sketch( - A, output_rank=k, column_oversample=p, row_oversample=l, - distribution=sdist, check_finite=True, random_state=random_state) - - #Build sample matrix Y = A * Omega and W = Psi * A - #Note: Y should approximate the column space and W the row space of A - Y = A.dot(Omega) - W = Psi.dot(A) - del Omega - - #Orthogonalize Y using economic QR decomposition: Y=QR - Q, _ = linalg.qr(Y, mode='economic', check_finite=False, overwrite_a=True) - U, T = linalg.qr(Psi.dot(Q), mode='economic', check_finite=False, overwrite_a=False) - - # Form a smaller matrix - B = linalg.solve(T, conjugate_transpose(U).dot(W), check_finite=False, - overwrite_a=True, overwrite_b=True) - - else: - # get random sketch - Q = sketch(A, output_rank=k, n_oversample=p, n_iter=q, distribution=sdist, - axis=1, check_finite=True, random_state=random_state) - - #Project the data matrix a into a lower dimensional subspace - B = conjugate_transpose(Q).dot(A) + # NOTE: we get the same performance by perfoming a single subspace + # iteration, the single pass construct doesn't really help us + # here unless we write it in C/Cython and compute the products + # Y = A * Omega, Y_tilde = A * Omega_tilde + # in sinlge passes + q = 1 + + Q = johnson_lindenstrauss(A, k + p, n_subspace=q, random_state=random_state) + + #Project the data matrix a into a lower dimensional subspace + B = conjugate_transpose(Q).dot(A) return Q, B diff --git a/ristretto/sketch/__init__.py b/ristretto/sketch/__init__.py index ea46cc9..2e1bfe4 100644 --- a/ristretto/sketch/__init__.py +++ b/ristretto/sketch/__init__.py @@ -1,7 +1,3 @@ """ Module :mod:`sketch` contains the functions for sketching and range finding. """ - -# TODO: FOR COMPATIBILITY: change once range_finders rewritten -from . import * -from .range_finders import sketch, single_pass_sketch, _get_distribution_func diff --git a/ristretto/sketch/_sketches.py b/ristretto/sketch/_sketches.py index 90d5859..e79d628 100644 --- a/ristretto/sketch/_sketches.py +++ b/ristretto/sketch/_sketches.py @@ -17,6 +17,11 @@ def random_gaussian_map(A, l, axis, random_state): return random_state.standard_normal(size=(A.shape[axis], l)).astype(A.dtype) +def random_uniform_map(A, l, axis, random_state): + """generate random uniform map""" + return random_state.uniform(-1, 1, size=(A.shape[axis], l)).astype(A.dtype) + + def sparse_random_map(A, l, axis, density, random_state): """generate sparse random sampling""" # TODO: evaluete random_state paramter: we want to pass it to sparse.random diff --git a/ristretto/sketch/range_finders.py b/ristretto/sketch/range_finders.py deleted file mode 100644 index 6186559..0000000 --- a/ristretto/sketch/range_finders.py +++ /dev/null @@ -1,150 +0,0 @@ -""" -Module containing random sketch generating object. -""" -# Authors: N. Benjamin Erichson -# Joseph Knox -# License: GNU General Public License v3.0 -from functools import partial -import warnings - -import numpy as np -from scipy import linalg - -from ..utils import check_random_state, conjugate_transpose - -_VALID_DISTRIBUTIONS = ('uniform', 'normal') -_VALID_SINGLE_DISTRIBUTIONS = ('uniform', 'normal', 'orthogonal') -_VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) -_QR_KWARGS = dict(mode='economic', check_finite=False, overwrite_a=True) - - -def _output_rank_check(A, output_rank): - n, m = A.shape - rank = min(n, m) if output_rank is None else output_rank - - if rank > min(n, m): - warnings.warn('output_rank %d is greater than the minimum ' - 'dimension of input array A (shape %s). The ' - 'minimum dimension will be chosen instead', - output_rank, n, m, min(n, m)) - rank = min(n, m) - return rank - - -def _get_distribution_func(distribution, random_state): - if distribution == 'uniform': - return partial(random_state.uniform, -1, 1) - return random_state.standard_normal - - -def sketch(A, out=None, output_rank=None, n_oversample=10, n_iter=2, - distribution='uniform', axis=0, check_finite=False, random_state=None): - random_state = check_random_state(random_state) - - # converts A to array, raise ValueError if A has inf or nan - A = np.asarray_chkfinite(A) if check_finite else np.asarray(A) - - if A.dtype not in _VALID_DTYPES: - raise ValueError('A.dtype must be one of %s, not %s' - % (' '.join(_VALID_DTYPES), A.dtype)) - - if distribution not in _VALID_DISTRIBUTIONS: - raise ValueError('distribution must be one of %s, not %s' - % (' '.join(_VALID_DISTRIBUTIONS), distribution)) - - if axis not in (0, 1): - raise ValueError('If specified, axis must be 0 or 1, not %s' % axis) - - # check rank - rank = _output_rank_check(A, output_rank) - - n_oversample += rank - size = (n_oversample, A.shape[0]) if axis == 0 else (A.shape[1], n_oversample) - - # get numpy random func - dist_func = _get_distribution_func(distribution, random_state) - - #Generate a random test matrix Omega - Omega = dist_func(size=size).astype(A.dtype) - - if A.dtype == np.complexfloating: - real_type = np.float32 if A.dtype == np.complex64 else np.float64 - Omega += 1j * dist_func(size=size).astype(real_type) - - #Build sample matrix Y : Y = A * Omega - # Y approximates range of A - Y = A.dot(Omega) - del Omega - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - #Orthogonalize Y using economic QR decomposition: Y=QR - #If q > 0 perfrom q subspace iterations - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - for _ in range(n_iter): - Y, _ = linalg.qr(Y, **_QR_KWARGS) - Z, _ = linalg.qr(conjugate_transpose(A).dot(Y), **_QR_KWARGS) - Y = A.dot(Z) - - # compute sketch - S, _ = linalg.qr(Y, **_QR_KWARGS) - - #if axis == 0: - # return np.dot(conjugate_transpose(S), A, out=out) - #return np.dot(A, S, out=out) - return S - - -def single_pass_sketch(A, output_rank=None, row_oversample=None, - column_oversample=10, distribution='uniform', - check_finite=False, random_state=None): - random_state = check_random_state(random_state) - - # converts A to array, raise ValueError if A has inf or nan - A = np.asarray_chkfinite(A) if check_finite else np.asarray(A) - - if A.dtype not in _VALID_DTYPES: - raise ValueError('A.dtype must be one of %s, not %s' - % (' '.join(_VALID_DTYPES), A.dtype)) - - if distribution not in _VALID_SINGLE_DISTRIBUTIONS: - raise ValueError('distribution must be one of %s, not %s' - % (' '.join(_VALID_SINGLE_DISTRIBUTIONS), distribution)) - - if row_oversample is None: - row_oversample = 2 * column_oversample - - # check rank - rank = _output_rank_check(A, output_rank) - row_oversample += rank - column_oversample += rank - - # get numpy random func - dist_func = _get_distribution_func(distribution, random_state) - - #Generate a random test matrix Omega - Omega = dist_func(size=(A.shape[1], column_oversample)).astype(A.dtype) - Psi = dist_func(size=(row_oversample, A.shape[0])).astype(A.dtype) - - if A.dtype == np.complexfloating: - real_type = np.float32 if A.dtype == np.complex64 else np.float64 - Omega += 1j * dist_func(size=(A.shape[1], column_oversample)).astype(real_type) - Psi += 1j * dist_func(size=(row_oversample, A.shape[0])).astype(real_type) - - if distribution == 'orthogonal': - Omega, _ = linalg.qr(Omega, **_QR_KWARGS) - Psi , _ = linalg.qr(conjugate_transpose(Psi), **_QR_KWARGS) - Psi = conjugate_transpose(Psi) - - #Build sample matrix Y = A * Omega and W = Psi * A - #Note: Y should approximate the column space and W the row space of A - #Y = A.dot(Omega) - #W = Psi.dot(A) - #del Omega - - #Orthogonalize Y using economic QR decomposition: Y=QR - #Q, _ = linalg.qr(Y, **_QR_KWARGS) - #U, T = linalg.qr(Psi.dot(Q), **_QR_KWARGS) - - # Form a smaller matrix - #return linalg.solve(T, conjugate_transpose(U).dot(W)) - return Omega, Psi diff --git a/ristretto/sketch/tests/test_transforms.py b/ristretto/sketch/tests/test_transforms.py index aa55585..440b0e1 100644 --- a/ristretto/sketch/tests/test_transforms.py +++ b/ristretto/sketch/tests/test_transforms.py @@ -22,11 +22,11 @@ def test_randomized_uniform_sampling(): # ------------------------------------------------------------------------ # tests raises incompatible axis - assert_raises(ValueError, randomized_uniform_sampling, A, l, axis=2) + assert_raises(IndexError, randomized_uniform_sampling, A, l, axis=2) # ------------------------------------------------------------------------ # tests raises incompatible A dimensions - assert_raises(ValueError, randomized_uniform_sampling, A[5], l) + assert_raises(IndexError, randomized_uniform_sampling, A[5], l) def test_johnson_linderstrauss(): diff --git a/ristretto/sketch/transforms.py b/ristretto/sketch/transforms.py index 2701050..306bbc7 100644 --- a/ristretto/sketch/transforms.py +++ b/ristretto/sketch/transforms.py @@ -18,20 +18,12 @@ def randomized_uniform_sampling(A, l, axis=1, random_state=None): """ random_state = check_random_state(random_state) - A = np.asarray(A) - if A.ndim != 2: - raise ValueError('A must be a 2D array, not %dD' % A.ndim) - - if axis not in (0, 1): - raise ValueError('If supplied, axis must be in (0, 1)') # sample l rows/columns with equal probability idx = _sketches.random_axis_sample(A, l, axis, random_state) - if axis == 0: - return A[idx] - return A[:, idx] + return np.take(A, idx, axis=axis) def johnson_lindenstrauss(A, l, n_subspace=None, axis=1, random_state=None): @@ -40,10 +32,6 @@ def johnson_lindenstrauss(A, l, n_subspace=None, axis=1, random_state=None): Given an m x n matrix A, and an integer l, this scheme computes an m x l orthonormal matrix Q whose range approximates the range of A - Notes - ----- - Johnson-Lindenstrauss - #Also known as randQB """ random_state = check_random_state(random_state) @@ -79,9 +67,6 @@ def sparse_johnson_lindenstrauss(A, l, density=None, axis=1, random_state=None): ---------- density : sparse matrix density - Notes - ----- - #Also known as randQB """ random_state = check_random_state(random_state) @@ -109,10 +94,6 @@ def fast_johnson_lindenstrauss(A, l, axis=1, random_state=None): Given an m x n matrix A, and an integer l, this scheme computes an m x l orthonormal matrix Q whose range approximates the range of A - Notes - ----- - Johnson-Lindenstrauss - #Also known as randQB """ random_state = check_random_state(random_state) From 0b4815ab2243fe76791b01451b87492c7944025a Mon Sep 17 00:00:00 2001 From: Joseph Knox Date: Tue, 22 May 2018 19:47:55 -0700 Subject: [PATCH 5/7] WIP: change parameter names, need to fix tests. --- ristretto/cur.py | 50 ++--- ristretto/dmd.py | 81 ++++---- ristretto/eigen.py | 106 +++++----- ristretto/interp_decomp.py | 251 ++++-------------------- ristretto/lu.py | 46 +++-- ristretto/pca.py | 54 ++--- ristretto/qb.py | 70 +++---- ristretto/sketch/transforms.py | 18 +- ristretto/svd.py | 346 +++------------------------------ ristretto/tests/test_dmd.py | 6 +- 10 files changed, 288 insertions(+), 740 deletions(-) diff --git a/ristretto/cur.py b/ristretto/cur.py index 9651ef0..9806917 100644 --- a/ristretto/cur.py +++ b/ristretto/cur.py @@ -13,7 +13,7 @@ from .interp_decomp import interp_decomp, rinterp_decomp -def cur(A, k=None, index_set=False): +def cur(A, rank=None, index_set=False): """CUR decomposition. Algorithm for computing the low-rank CUR @@ -28,10 +28,10 @@ def cur(A, k=None, index_set=False): Parameters ---------- A : array_like, shape `(m, n)`. - Input matrix. + Input array. - k : integer, `k << min{m,n}`. - Target rank. + rank : integer + Target rank. Best if `rank << min{m,n}` index_set: str `{'True', 'False'}`, default: `index_set='False'`. 'True' : Return column/row index set instead of `C` and `R`. @@ -57,13 +57,13 @@ def cur(A, k=None, index_set=False): (available at `arXiv `_). """ # compute column ID - J, V = interp_decomp(A, k=k, mode='column', index_set=True) + J, V = interp_decomp(A, rank, mode='column', index_set=True) # select column subset C = A[:, J] # compute row ID of C - Z, I = interp_decomp(C, k=k, mode='row', index_set=True) + Z, I = interp_decomp(C, rank, mode='row', index_set=True) # select row subset R = A[I, :] @@ -77,31 +77,37 @@ def cur(A, k=None, index_set=False): return C, U, R -def rcur(A, k=None, p=10, q=1, index_set=False, random_state=None): +def rcur(A, rank, oversample=10, n_subspace=1, index_set=False, random_state=None): """Randomized CUR decomposition. Randomized algorithm for computing the approximate low-rank CUR - decomposition of a rectangular `(m, n)` matrix `A`, with target rank `k << min{m, n}`. + decomposition of a rectangular `(m, n)` matrix `A`, with target rank `rank << min{m, n}`. Input matrix is factored as `A = C * U * R`, using the column/row pivoted QR decomposition. The factor matrix `C` is formed of a subset of columns of `A`, also called the partial column skeleton. The factor matrix `R` is formed as a subset of rows of `A` also called the partial row skeleton. The factor matrix `U` is formed so that `U = C**-1 * A * R**-1` is satisfied. + The quality of the approximation can be controlled via the oversampling + parameter `oversample` and `n_subspace` which specifies the number of + subspace iterations. + Parameters ---------- A : array_like, shape `(m, n)`. - Input matrix. + Input array. - k : integer, `k << min{m,n}`. - Target rank. + rank : integer + Target rank. Best if `rank << min{m,n}` - p : integer, default: `p=10`. - Parameter to control oversampling. + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. - q : integer, default: `q=1`. - Parameter to control number of power (subspace) iterations. + n_subspace : integer, default: 1. + Parameter to control number of subspace iterations. Increasing this + parameter may improve numerical accuracy. index_set: str `{'True', 'False'}`, default: `index_set='False'`. 'True' : Return column/row index set instead of `C` and `R`. @@ -114,13 +120,13 @@ def rcur(A, k=None, p=10, q=1, index_set=False, random_state=None): Returns ------- - C: array_like, shape `(m, k)`. + C: array_like, shape `(m, rank)`. Partial column skeleton. - U : array_like, shape `(k, k)`. + U : array_like, shape `(rank, rank)`. Well-conditioned matrix. - R : array_like, shape `(k, n)`. + R : array_like, shape `(rank, n)`. Partial row skeleton. @@ -133,15 +139,15 @@ def rcur(A, k=None, p=10, q=1, index_set=False, random_state=None): (available at `arXiv `_). """ # Compute column ID - J, V = rinterp_decomp(A, k=k, p=p, q=q, mode='column', index_set=True, - random_state=random_state) + J, V = rinterp_decomp(A, rank, oversample=oversample, n_subspace=n_subspace, + mode='column', index_set=True, random_state=random_state) # Select column subset C = A[:, J] # Compute row ID of C - Z, I = rinterp_decomp(C, k=k, p=p, q=q, mode='row', index_set=True, - random_state=random_state) + Z, I = rinterp_decomp(A, rank, oversample=oversample, n_subspace=n_subspace, + mode='row', index_set=True, random_state=random_state) # Select row subset R = A[I, :] diff --git a/ristretto/dmd.py b/ristretto/dmd.py index 58e3dde..f4d5989 100644 --- a/ristretto/dmd.py +++ b/ristretto/dmd.py @@ -23,7 +23,7 @@ def _get_amplitudes(F, A): return result[0] -def dmd(A, dt=1, k=None, modes='exact', return_amplitudes=False, +def dmd(A, rank=None, dt=1, modes='exact', return_amplitudes=False, return_vandermonde=False, order=True): """Dynamic Mode Decomposition. @@ -36,14 +36,14 @@ def dmd(A, dt=1, k=None, modes='exact', return_amplitudes=False, Parameters ---------- - A : array_like - Real/complex input matrix `a` with dimensions `(m, n)`. + A : array_like, shape `(m, n)`. + Input array. - dt : scalar or array_like - Factor specifying the time difference between the observations. + rank : int + If `rank < (n-1)` low-rank Dynamic Mode Decomposition is computed. - k : int, optional - If `k < (n-1)` low-rank Dynamic Mode Decomposition is computed. + dt : scalar or array_like, optional (default: 1) + Factor specifying the time difference between the observations. modes : str `{'standard', 'exact', 'exact_scaled'}` - 'standard' : uses the standard definition to compute the dynamic modes, `F = U * W`. @@ -66,10 +66,10 @@ def dmd(A, dt=1, k=None, modes='exact', return_amplitudes=False, Matrix containing the dynamic modes of shape `(m, n-1)` or `(m, k)`. b : array_like, if `return_amplitudes=True` - 1-D array containing the amplitudes of length `min(n-1, k)`. + 1-D array containing the amplitudes of length `min(n-1, rank)`. V : array_like, if `return_vandermonde=True` - Vandermonde matrix of shape `(n-1, n-1)` or `(k, n-1)`. + Vandermonde matrix of shape `(n-1, n-1)` or `(rank, n-1)`. omega : array_like Time scaled eigenvalues: `ln(l)/dt`. @@ -97,8 +97,8 @@ def dmd(A, dt=1, k=None, modes='exact', return_amplitudes=False, raise ValueError('A.dtype must be one of %s, not %s' % (' '.join(_VALID_DTYPES), A.dtype)) - if k is not None and (k < 1 or k > n - 1): - raise ValueError('k must be > 1 and less than n - 1') + if rank is not None and (rank < 1 or rank > n - 1): + raise ValueError('rank must be > 1 and less than n - 1') #Split data into lef and right snapshot sequence X = A[:, :(n-1)] #pointer @@ -108,10 +108,10 @@ def dmd(A, dt=1, k=None, modes='exact', return_amplitudes=False, U, s, Vh = linalg.svd(X, compute_uv=True, full_matrices=False, overwrite_a=False, check_finite=True) - if k is not None: - U = U[:, :k] - s = s[:k] - Vh = Vh[:k, :] + if rank is not None: + U = U[:, :rank] + s = s[:rank] + Vh = Vh[:rank, :] #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #Solve the LS problem to find estimate for M using the pseudo-inverse @@ -156,9 +156,8 @@ def dmd(A, dt=1, k=None, modes='exact', return_amplitudes=False, return result -def rdmd(A, dt=1, k=None, p=10, l=None, q=2, sdist='uniform', single_pass=False, - return_amplitudes=False, return_vandermonde=False, order=True, - random_state=None): +def rdmd(A, rank, dt=1, oversample=10, n_subspace=1, return_amplitudes=False, + return_vandermonde=False, order=True, random_state=None): """Randomized Dynamic Mode Decomposition. Dynamic Mode Decomposition (DMD) is a data processing algorithm which @@ -167,34 +166,29 @@ def rdmd(A, dt=1, k=None, p=10, l=None, q=2, sdist='uniform', single_pass=False, The modes are ordered corresponding to the amplitudes stored in the diagonal matrix `B`. `V` is a Vandermonde matrix describing the temporal evolution. + The quality of the approximation can be controlled via the oversampling + parameter `oversample` and `n_subspace` which specifies the number of + subspace iterations. + Parameters ---------- - A : array_like - Real/complex input matrix `a` with dimensions `(m, n)`. + A : array_like, shape `(m, n)`. + Input array. + + rank : integer + Target rank. Best if `rank << min{m,n}` dt : scalar or array_like Factor specifying the time difference between the observations. - k : int - If `k < (n-1)` low-k Dynamic Mode Decomposition is computed. + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. - p : integer, default: `p=10`. - Parameter to control oversampling of column space. - - l : integer, default: `l=2*p`. - Parameter to control oversampling of row space. Only relevant if - single_pass == True. - - q : int, optional - Number of subspace iterations to perform. Only relevant if - single_pass == False - - sdist : str `{'uniform', 'normal'}` - Specify the distribution of the sensing matrix `S`. - - single_pass : bool - If single_pass == True, perfom single pass of algorithm. + n_subspace : integer, default: 1. + Parameter to control number of subspace iterations. Increasing this + parameter may improve numerical accuracy. return_amplitudes : bool `{True, False}` True: return amplitudes in addition to dynamic modes. @@ -214,13 +208,13 @@ def rdmd(A, dt=1, k=None, p=10, l=None, q=2, sdist='uniform', single_pass=False, Returns ------- F : array_like - Matrix containing the dynamic modes of shape `(m, n-1)` or `(m, k)`. + Matrix containing the dynamic modes of shape `(m, rank)`. b : array_like, if `return_amplitudes=True` - 1-D array containing the amplitudes of length `min(n-1, k)`. + 1-D array containing the amplitudes of length `min(n-1, rank)`. V : array_like, if `return_vandermonde=True` - Vandermonde matrix of shape `(n-1, n-1)` or `(k, n-1)`. + Vandermonde matrix of shape `(rank, n-1)`. omega : array_like Time scaled eigenvalues: `ln(l)/dt`. @@ -233,13 +227,12 @@ def rdmd(A, dt=1, k=None, p=10, l=None, q=2, sdist='uniform', single_pass=False, (available at `arXiv `_). """ # Compute QB decomposition - Q, B = rqb(A, k=k, p=p, l=l, q=q, sdist=sdist, single_pass=single_pass, - random_state=random_state) + Q, B = rqb(A, rank, oversample=oversample, n_subspace=n_subspace, random_state=random_state) # only difference is we need to premultiply F from dmd # vandermonde is basically already computed # TODO: factor out the rest so no code is repeated - F, V, omega = dmd(B, dt=dt, k=k, modes='standard',return_amplitudes=False, + F, V, omega = dmd(B, rank=rank, dt=dt, modes='standard', return_amplitudes=False, return_vandermonde=True, order=order) #Compute DMD Modes diff --git a/ristretto/eigen.py b/ristretto/eigen.py index ee4740f..c9de0ad 100644 --- a/ristretto/eigen.py +++ b/ristretto/eigen.py @@ -6,6 +6,7 @@ # License: GNU General Public License v3.0 # TODO: repace nystroem_col with random uniform sampling +# TODO: conform functions to return like scipy.linalg.eig and rename from __future__ import division, print_function @@ -18,28 +19,28 @@ _VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) -def reigh(A, k, p=20, q=2, sdist='normal', random_state=None): +def reigh(A, rank, oversample=10, n_subspace=1, random_state=None): """Randomized eigendecompostion. + The quality of the approximation can be controlled via the oversampling + parameter `oversample` and `n_subspace` which specifies the number of + subspace iterations. Parameters ---------- - A : array_like, shape `(n, n)`. - Hermitian matrix. + A : array_like, shape `(m, n)`. + Input array. - k : integer, `k << n`. - Target rank. + rank : integer + Target rank. Best if `rank << min{m,n}` - p : integer, default: `p=10`. - Parameter to control oversampling. + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. - q : integer, default: `q=2`. - Parameter to control number of power (subspace) iterations. - - sdist : str `{'uniform', 'normal'}`, default: `sdist='uniform'`. - 'uniform' : Random test matrix with uniform distributed elements. - - 'normal' : Random test matrix with normal distributed elements. + n_subspace : integer, default: 1. + Parameter to control number of subspace iterations. Increasing this + parameter may improve numerical accuracy. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; @@ -66,9 +67,8 @@ def reigh(A, k, p=20, q=2, sdist='normal', random_state=None): (available at `arXiv `_). """ # get random sketch - Q = johnson_lindenstrauss(A, k + p, n_subspace=q, axis=1, random_state=random_state) - #Q = sketch(A, output_rank=k, n_oversample=p, n_iter=q, distribution=sdist, - # axis=1, check_finite=True, random_state=random_state) + Q = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, + axis=1, random_state=random_state) #Project the data matrix a into a lower dimensional subspace B = A.dot(Q) @@ -85,28 +85,34 @@ def reigh(A, k, p=20, q=2, sdist='normal', random_state=None): return w[:k], Q.dot(v)[:,:k] -def reigh_nystroem(A, k, p=10, q=2, sdist='normal', random_state=None): +def reigh_nystroem(A, rank, oversample=10, n_subspace=1, random_state=None): """Randomized eigendecompostion using the Nystroem method. + The quality of the approximation can be controlled via the oversampling + parameter `oversample` and `n_subspace` which specifies the number of + subspace iterations. Parameters ---------- - A : array_like, shape `(n, n)`. - Positive-definite matrix (PSD) input matrix. + A : array_like, shape `(m, n)`. + Input array. - k : integer, `k << n`. - Target rank. + rank : integer + Target rank. Best if `rank << min{m,n}` - p : integer, default: `p=10`. - Parameter to control oversampling. + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. - q : integer, default: `q=2`. - Parameter to control number of power (subspace) iterations. + n_subspace : integer, default: 1. + Parameter to control number of subspace iterations. Increasing this + parameter may improve numerical accuracy. - sdist : str `{'uniform', 'normal'}`, default: `sdist='uniform'`. - 'uniform' : Random test matrix with uniform distributed elements. + random_state : integer, RandomState instance or None, optional (default ``None``) + If integer, random_state is the seed used by the random number generator; + If RandomState instance, random_state is the random number generator; + If None, the random number generator is the RandomState instance used by np.random. - 'normal' : Random test matrix with normal distributed elements. Returns ------- @@ -127,9 +133,8 @@ def reigh_nystroem(A, k, p=10, q=2, sdist='normal', random_state=None): (available at `arXiv `_). """ # get random sketch - S = johnson_lindenstrauss(A, k + p, n_subspace=q, axis=1, random_state=random_state) - #S = sketch(A, output_rank=k, n_oversample=p, n_iter=q, distribution=sdist, - # axis=1, check_finite=True, random_state=random_state) + S = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, + axis=1, random_state=random_state) #Project the data matrix a into a lower dimensional subspace B1 = A.dot(S) @@ -163,26 +168,31 @@ def reigh_nystroem(A, k, p=10, q=2, sdist='normal', random_state=None): return w[:k]**2, v[:,:k] -def reigh_nystroem_col(A, k, p=0, random_state=None): +def reigh_nystroem_col(A, rank, oversample=0, random_state=None): """Randomized eigendecompostion using the Nystroem method. + The quality of the approximation can be controlled via the oversampling + parameter `oversample`. + Parameters ---------- - A : array_like, shape `(n, n)`. - Positive-definite matrix (PSD) input matrix. + A : array_like, shape `(m, n)`. + Input array. - k : integer, `k << n`. - Target rank. + rank : integer + Target rank. Best if `rank << min{m,n}` - p : integer, default: `p=0`. - Parameter to control oversampling. + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. + Returns ------- w : array_like, 1-d array of length `k`. @@ -214,15 +224,11 @@ def reigh_nystroem_col(A, k, p=0, random_state=None): raise ValueError('A.dtype must be one of %s, not %s' % (' '.join(_VALID_DTYPES), A.dtype)) - if k is None: - # default - k = min(m,n) - - if k < 1 or k > min(m, n): - raise ValueError("Target rank k must be >= 1 or < min(m, n), not %d" % k) + if rank < 1 or rank > min(m, n): + raise ValueError("Target rank must be >= 1 or < min(m, n), not %d" % rank) #Generate a random test matrix Omega - idx = np.sort(random_state.choice(n, size=(k+p), replace=False)) + idx = np.sort(random_state.choice(n, size=(rank+oversample), replace=False)) #Project the data matrix a into a lower dimensional subspace B1 = A[:,idx] @@ -239,10 +245,10 @@ def reigh_nystroem_col(A, k, p=0, random_state=None): U, s, _ = linalg.svd(B2, full_matrices=False, overwrite_a=True, check_finite=False) U = B1.dot(U / s) - U = U[:, :k] * np.sqrt(k / n) - s = s[:k] * (n / k) + U = U[:, :rank] * np.sqrt(rank / n) + s = s[:rank] * (n / rank) - return s[:k], U + return s[:rank], U # Upper triangular solve F = linalg.solve_triangular(C, conjugate_transpose(B1), lower=True, @@ -253,4 +259,4 @@ def reigh_nystroem_col(A, k, p=0, random_state=None): v, w, _ = linalg.svd(conjugate_transpose(F), compute_uv=True, full_matrices=False, overwrite_a=True, check_finite=False) - return w[:k]**2, v[:, :k] + return w[:rank]**2, v[:, :rank] diff --git a/ristretto/interp_decomp.py b/ristretto/interp_decomp.py index 7cc3d0a..0ebbdbd 100644 --- a/ristretto/interp_decomp.py +++ b/ristretto/interp_decomp.py @@ -11,19 +11,12 @@ from scipy import linalg from .qb import rqb -from .utils import check_random_state, conjugate_transpose +from .utils import conjugate_transpose -_VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) -_VALID_SDISTS = ('uniform', 'normal') _VALID_MODES = ('row', 'column') -def _get_distribution_func(distribution, random_state): - if distribution == 'uniform': - return partial(random_state.uniform, -1, 1) - return random_state.standard_normal - -def interp_decomp(A, k=None, mode='column', index_set=False): +def interp_decomp(A, rank, mode='column', index_set=False): """Interpolative decomposition (ID). Algorithm for computing the low-rank ID @@ -31,7 +24,7 @@ def interp_decomp(A, k=None, mode='column', index_set=False): Input matrix is factored as `A = C * V`, using the column pivoted QR decomposition. The factor matrix `C` is formed of a subset of columns of `A`, also called the partial column skeleton. The factor matrix `V` contains - a `(k, k)` identity matrix as a submatrix, and is well-conditioned. + a `(rank, rank)` identity matrix as a submatrix, and is well-conditioned. If `mode='row'`, then the input matrix is factored as `A = Z * R`, using the row pivoted QR decomposition. The factor matrix `R` is now formed as @@ -40,10 +33,10 @@ def interp_decomp(A, k=None, mode='column', index_set=False): Parameters ---------- A : array_like, shape `(m, n)`. - Input matrix. + Input array. - k : integer, `k << min{m,n}`. - Target rank. + rank : integer + Target rank. Best if `rank << min{m,n}` mode: str `{'column', 'row'}`, default: `mode='column'`. 'column' : ID using column pivoted QR. @@ -52,20 +45,21 @@ def interp_decomp(A, k=None, mode='column', index_set=False): index_set: str `{'True', 'False'}`, default: `index_set='False'`. 'True' : Return column/row index set instead of `C` or `R`. + Returns ------- If `mode='column'`: - C: array_like, shape `(m, k)`. + C: array_like, shape `(m, rank)`. Partial column skeleton. - V : array_like, shape `(k, n)`. + V : array_like, shape `(rank, n)`. Well-conditioned matrix. If `mode='row'`: - Z: array_like, shape `(m, k)`. + Z: array_like, shape `(m, rank)`. Well-conditioned matrix. - R : array_like, shape `(k, n)`. + R : array_like, shape `(rank, n)`. Partial row skeleton. References @@ -84,44 +78,36 @@ def interp_decomp(A, k=None, mode='column', index_set=False): A = np.asarray_chkfinite(A) if mode=='row': A = conjugate_transpose(A) - m, n = A.shape - - if A.dtype not in _VALID_DTYPES: - raise ValueError('A.dtype must be one of %s, not %s' - % (' '.join(_VALID_DTYPES), A.dtype)) - if k is None: - # default - k = min(m,n) - - if k < 1 or k > min(m, n): - raise ValueError("Target rank k must be >= 1 or < min(m, n), not %d" % k) + m, n = A.shape + if rank < 1 or rank > min(m, n): + raise ValueError("Target rank rank must be >= 1 or < min(m, n), not %d" % rank) #Pivoted QR decomposition Q, R, P = linalg.qr(A, mode='economic', overwrite_a=False, pivoting=True, check_finite=False) # Select column subset - C = A[:, P[:k]] + C = A[:, P[:rank]] # Compute V - T = linalg.pinv2(R[:k, :k]).dot(R[:k, k:n]) - V = np.bmat([[np.eye(k), T]]) + T = linalg.pinv2(R[:rank, :rank]).dot(R[:rank, rank:n]) + V = np.bmat([[np.eye(rank), T]]) V = V[:, np.argsort(P)] # Return ID if mode == 'column': if index_set: - return P[:k], V + return P[:rank], V return C, V # mode == row elif index_set: - return conjugate_transpose(V), P[:k] + return conjugate_transpose(V), P[:rank] return conjugate_transpose(V), conjugate_transpose(C) -def rinterp_decomp(A, k=None, mode='column', p=10, q=1, sdist='normal', +def rinterp_decomp(A, rank, oversample=10, n_subspace=1, mode='column', index_set=False, random_state=None): """Randomized interpolative decomposition (rID). @@ -129,7 +115,7 @@ def rinterp_decomp(A, k=None, mode='column', p=10, q=1, sdist='normal', decomposition of a rectangular `(m, n)` matrix `A`, with target rank `k << min{m, n}`. The input matrix is factored as `A = C * V`. The factor matrix `C`is formed of a subset of columns of `A`, also called the partial column skeleton. - The factor matrix `V`contains a `(k, k)` identity matrix as a submatrix, + The factor matrix `V`contains a `(rank, rank)` identity matrix as a submatrix, and is well-conditioned. If `mode='row'`, then the input matrix is factored as `A = Z * R`, using the @@ -137,178 +123,32 @@ def rinterp_decomp(A, k=None, mode='column', p=10, q=1, sdist='normal', a subset of rows of `A`, also called the partial row skeleton. The quality of the approximation can be controlled via the oversampling - parameter `p` and the parameter `q` which specifies the number of + parameter `oversample` and `n_subspace` which specifies the number of subspace iterations. Parameters ---------- A : array_like, shape `(m, n)`. - Input matrix. - - k : integer, `k << min{m,n}`. - Target rank. - - mode: str `{'column', 'row'}`, default: `mode='column'`. - 'column' : Column ID. - 'row' : Row ID. - - p : integer, default: `p=10`. - Parameter to control oversampling. - - q : integer, default: `q=1`. - Parameter to control number of power (subspace) iterations. - - sdist : str `{'uniform', 'normal'}`, default: `sdist='uniform'`. - 'uniform' : Random test matrix with uniform distributed elements. - - 'normal' : Random test matrix with normal distributed elements. - - index_set: str `{'True', 'False'}`, default: `index_set='False'`. - 'True' : Return column/row index set instead of `C` or `R`. - - random_state : integer, RandomState instance or None, optional (default ``None``) - If integer, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used by np.random. - - Returns - ------- - If `mode='column'`: - C: array_like, shape `(m, k)`. - Partial column skeleton. - - V : array_like, shape `(k, n)`. - Well-conditioned matrix. - - If `mode='row'`: - Z: array_like, shape `(m, k)`. - Well-conditioned matrix. - - R : array_like, shape `(k, n)`. - Partial row skeleton. - - References - ---------- - S. Voronin and P.Martinsson. - "RSVDPACK: Subroutines for computing partial singular value - decompositions via randomized sampling on single core, multi core, - and GPU architectures" (2015). - (available at `arXiv `_). - """ - random_state=check_random_state(random_state) - - # converts A to array, raise ValueError if A has inf or nan - A = np.asarray_chkfinite(A) - - if mode=='row': - A = conjugate_transpose(A) - - if A.dtype not in _VALID_DTYPES: - raise ValueError('A.dtype must be one of %s, not %s' - % (' '.join(_VALID_DTYPES), A.dtype)) - - if mode not in _VALID_MODES: - raise ValueError('mode must be one of %s, not %s' - % (' '.join(_VALID_MODES), mode)) - - if sdist not in _VALID_SDISTS: - raise ValueError('sdists must be one of %s, not %s' - % (' '.join(_VALID_SDISTS), sdist)) - - m, n = A.shape - if k is None: - # default - k = min(m, n) - - if k < 1 or k > min(m, n): - raise ValueError("Target rank k must be >= 1 or < min(m, n), not %d" % k) - - # distribution to draw random samples - sdist_func = _get_distribution_func(sdist, random_state) - - #Generate a random test matrix Omega - Omega = sdist_func(size=(k+p, m)).astype(A.dtype) - - if A.dtype == np.complexfloating: - real_type = np.float32 if A.dtype == np.complex64 else np.float64 - Omega += 1j * sdist_func(size=(k+p, m)).astype(real_type) + Input array. - #Build sample matrix Y : Y = A * Omega (Y approximates range of A) - Y = Omega.dot(A) - del Omega + rank : integer + Target rank. Best if `rank << min{m,n}` - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - #Orthogonalize Y using economic QR decomposition: Y=QR - #If q > 0 perfrom q subspace iterations - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - for _ in range(q): - Y, _ = linalg.qr(conjugate_transpose(Y), mode='economic', - check_finite=False, overwrite_a=True) - Z, _ = linalg.qr(A.dot(Y), mode='economic', check_finite=False, overwrite_a=True) - Y = conjugate_transpose(Z).dot(A) + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. - del Z - - # Deterministic ID - J, V = interp_decomp(Y, k=k, mode='column', index_set=True) - J = J[0:k] - - if mode == 'column': - if index_set: - return J, V - return A[:,J], V - # mode == 'row' - elif index_set: - return conjugate_transpose(V), J - return conjugate_transpose(V), conjugate_transpose(A[:,J]) - - -def rinterp_decomp_qb(A, k=None, mode='column', p=10, q=1, sdist='normal', - index_set=False, random_state=None): - r"""Randomized interpolative decomposition (rID). - - Algorithm for computing the approximate low-rank ID - decomposition of a rectangular `(m, n)` matrix `A`, with target rank `k << min{m, n}`. - The input matrix is factored as `A = C * V`. The factor matrix $\mathbf{C}$ is formed - of a subset of columns of $\mathbf{A}$, also called the partial column skeleton. - The factor matrix $\mathbf{V}$ contains a $k\times k$ identity matrix as a submatrix, - and is well-conditioned. - - If `mode='row'`, then the input matrix is factored as `A = Z * R`, using the - row pivoted QR decomposition. The factor matrix $\mathbf{C}$ is now formed as - a subset of rows of $\mathbf{A}$, also called the partial row skeleton. - - The quality of the approximation can be controlled via the oversampling - parameter `p` and the parameter `q` which specifies the number of - subspace iterations. - - - Parameters - ---------- - A : array_like, shape `(m, n)`. - Input matrix. - - k : integer, `k << min{m,n}`. - Target rank. + n_subspace : integer, default: 1. + Parameter to control number of subspace iterations. Increasing this + parameter may improve numerical accuracy. mode: str `{'column', 'row'}`, default: `mode='column'`. 'column' : Column ID. 'row' : Row ID. - p : integer, default: `p=10`. - Parameter to control oversampling. - - q : integer, default: `q=1`. - Parameter to control number of power (subspace) iterations. - - sdist : str `{'uniform', 'normal'}`, default: `sdist='uniform'`. - 'uniform' : Random test matrix with uniform distributed elements. - - 'normal' : Random test matrix with normal distributed elements. - index_set: str `{'True', 'False'}`, default: `index_set='False'`. - 'True' : Return column/row index set. + 'True' : Return column/row index set instead of `C` or `R`. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; @@ -319,24 +159,19 @@ def rinterp_decomp_qb(A, k=None, mode='column', p=10, q=1, sdist='normal', Returns ------- If `mode='column'`: - C: array_like, shape `(m, k)`. + C: array_like, shape `(m, rank)`. Partial column skeleton. - V : array_like, shape `(k, n)`. + V : array_like, shape `(rank, n)`. Well-conditioned matrix. If `mode='row'`: - Z: array_like, shape `(m, k)`. + Z: array_like, shape `(m, rank)`. Well-conditioned matrix. - R : array_like, shape `(k, n)`. + R : array_like, shape `(rank, n)`. Partial row skeleton. - - J : array_like, shape `(k, n)`. - Column/row index set. - - References ---------- S. Voronin and P.Martinsson. @@ -351,22 +186,22 @@ def rinterp_decomp_qb(A, k=None, mode='column', p=10, q=1, sdist='normal', # converts A to array, raise ValueError if A has inf or nan A = np.asarray_chkfinite(A) - if mode=='row': + if mode == 'row': A = conjugate_transpose(A) # compute QB factorization - Q, B = rqb(A, k=k, p=p, q=q, sdist=sdist, random_state=random_state) + Q, B = rqb(A, rank, oversample=oversample, n_subspace=n_subspace, random_state=random_state) # Deterministic ID - J, V = interp_decomp(B, k=k, mode='column', index_set=True) - J = J[:k] + J, V = interp_decomp(B, rank, mode='column', index_set=True) + J = J[:rank] # Return ID - if mode=='column': + if mode == 'column': if index_set: return J, V - return A[:,J], V + return A[:, J], V # mode == 'row' elif index_set: return conjugate_transpose(V), J - return conjugate_transpose(V), conjugate_transpose(A[:,J]) + return conjugate_transpose(V), conjugate_transpose(A[:, J]) diff --git a/ristretto/lu.py b/ristretto/lu.py index 15d6d75..eabb752 100644 --- a/ristretto/lu.py +++ b/ristretto/lu.py @@ -15,41 +15,38 @@ from .utils import conjugate_transpose -def rlu(A, permute=False, k=None, p=10, q=1, sdist='uniform', random_state=None): +def rlu(A, rank, oversample=10, n_subspace=1, permute=False, random_state=None): """Randomized LU Decomposition. Randomized algorithm for computing the approximate low-rank LU - decomposition of a rectangular `(m, n)` matrix `A`, with target rank `k << min{m, n}`. - The input matrix is factored as `A = P * L * U * C`, where + decomposition of a rectangular `(m, n)` matrix `A`, with target rank + `rank << min{m, n}`. The input matrix is factored as `A = P * L * U * C`, where `L` and `U` are the lower and upper triangular matrices, respectively. And `P` and `C` are the row and column permutation matrices. The quality of the approximation can be controlled via the oversampling - parameter `p` and the parameter `q` which specifies the number of + parameter `oversample` and `n_subspace` which specifies the number of subspace iterations. Parameters ---------- A : array_like, shape `(m, n)`. - Real nonnegative input matrix. + Input array. - permute : bool, default: `permute=False`. - If `True`, perform the multiplication P*L and U*C. - - k : integer, `k << min{m,n}`. - Target rank. + rank : integer + Target rank. Best if `rank << min{m,n}` - p : integer, default: `p=10`. - Parameter to control oversampling. + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. - q : integer, default: `q=1`. - Parameter to control number of power (subspace) iterations. + n_subspace : integer, default: 1. + Parameter to control number of subspace iterations. Increasing this + parameter may improve numerical accuracy. - sdist : str `{'uniform', 'normal'}`, default: `sdist='uniform'`. - 'uniform' : Random test matrix with uniform distributed elements. - - 'normal' : Random test matrix with normal distributed elements. + permute : bool, default: `permute=False`. + If `True`, perform the multiplication P*L and U*C. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; @@ -59,16 +56,16 @@ def rlu(A, permute=False, k=None, p=10, q=1, sdist='uniform', random_state=None) Returns ------- P : array_like, shape `(m, m)`. - Row permutation matrix, if `permute_l=False`. + Row permutation matrix, only returned if `permute == False`. - L : array_like, shape `(m, k)`. + L : array_like, shape `(m, rank)`. Lower triangular matrix. - U : array_like, shape `(k, n)`. + U : array_like, shape `(rank, n)`. Upper triangular matrix. C : array_like, shape `(n, n)`. - Column Permutation matrix, if `permute=False`. + Column permutation matrix, only returned if `permute == False`. References @@ -85,7 +82,8 @@ def rlu(A, permute=False, k=None, p=10, q=1, sdist='uniform', random_state=None) (available at `arXiv `_). """ # get random sketch - S = johnson_lindenstrauss(A, k + p, n_subspace=q, axis=1, random_state=random_state) + S = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, + random_state=random_state) # Compute pivoted LU decompostion of the orthonormal basis matrix Q. # Q = P * L * U @@ -93,7 +91,7 @@ def rlu(A, permute=False, k=None, p=10, q=1, sdist='uniform', random_state=None) _, r ,_ = sparse.find(P.T) # Truncate L_tilde - L_tilde = L_tilde[:, :k] + L_tilde = L_tilde[:, :rank] # Form smaller matrix B U, s, Vt = linalg.svd(L_tilde, compute_uv=True, full_matrices=False, diff --git a/ristretto/pca.py b/ristretto/pca.py index f297009..765814c 100644 --- a/ristretto/pca.py +++ b/ristretto/pca.py @@ -13,7 +13,7 @@ from .qb import rqb -def spca(X, n_components=None, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, +def spca(A, n_components, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, verbose=True): r"""Sparse Principal Component Analysis (SPCA). @@ -27,7 +27,7 @@ def spca(X, n_components=None, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, Parameters ---------- A : array_like, shape `(m, n)`. - Real nonnegative input matrix. + Input array. n_components : integer, `n_components << min{m,n}`. Target rank, i.e., number of sparse components to be computed. @@ -67,12 +67,12 @@ def spca(X, n_components=None, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, minimize :math:`1/2 \| X - X B A^T \|^2 + \alpha \|B\|_1 + 1/2 \beta \|B\|^2` """ # Shape of input matrix - m, n = X.shape + m = A.shape[0] #-------------------------------------------------------------------- # Initialization of Variable Projection Solver #-------------------------------------------------------------------- - _, D, Vt = linalg.svd(X , full_matrices=False, overwrite_a=False) + _, D, Vt = linalg.svd(A, full_matrices=False, overwrite_a=False) Dmax = D[0] # l2 norm A = Vt.T[:, 0:n_components] @@ -137,7 +137,7 @@ def spca(X, n_components=None, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, return(B, A, eigvals, obj) -def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, +def robspca(A, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, tol=1e-5, verbose=True): r"""Robust Sparse Principal Component Analysis (Robust SPCA). @@ -151,7 +151,7 @@ def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, Parameters ---------- A : array_like, shape `(m, n)`. - Real nonnegative input matrix. + Input array. n_components : integer, `n_components << min{m,n}`. Target rank, i.e., number of sparse components to be computed. @@ -201,10 +201,10 @@ def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, minimize :math:`1/2 \| X - X B A^T \|^2 + \alpha \|B\|_1 + 1/2 \beta \|B\|^2` """ # Shape of input matrix - m, n = X.shape + m = A.shape[0] # Initialization of Variable Projection Solver - U, D, Vt = linalg.svd(X , full_matrices=False, overwrite_a=False) + U, D, Vt = linalg.svd(A, full_matrices=False, overwrite_a=False) Dmax = D[0] #l2 norm @@ -219,7 +219,7 @@ def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, beta *= Dmax**2 nu = 1.0 / (Dmax**2 + beta) kappa = nu * alpha - S = np.zeros_like(X) + S = np.zeros_like(A) # Apply Variable Projection Solver n_iter = 0 @@ -229,8 +229,8 @@ def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, # Update A: # X'XB = UDV' # Compute X'XB via SVD of X - XS = X - S - XB = X.dot(B) + XS = A - S + XB = A.dot(B) Z = (XS).T.dot(XB) Utilde, Dtilde, Vttilde = linalg.svd( Z , full_matrices=False, overwrite_a=True) @@ -239,7 +239,7 @@ def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, # Proximal Gradient Descent to Update B R = XS - XB.dot(A.T) - G = X.T.dot(R.dot(A)) - beta * B + G = A.T.dot(R.dot(A)) - beta * B B_temp = B + nu * G @@ -251,7 +251,7 @@ def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, B[idxL] = B_temp[idxL] + kappa # compute residual - R = X - X.dot(B).dot(A.T) + R = A - A.dot(B).dot(A.T) # l1 soft-threshold idxH = R > gamma @@ -283,8 +283,8 @@ def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, return B, A, S, eigvals, obj -def rspca(X, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, - verbose=0, p=20, q=2, random_state=None): +def rspca(A, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, + verbose=0, oversample=10, n_subspace=1, random_state=None): r"""Randomized Sparse Principal Component Analysis (rSPCA). Given a mean centered rectangular matrix `A` with shape `(m, n)`, SPCA @@ -296,6 +296,11 @@ def rspca(X, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, This algorithm uses randomized methods for linear algebra to accelerate the computations. + The quality of the approximation can be controlled via the oversampling + parameter `oversample` and `n_subspace` which specifies the number of + subspace iterations. + + Parameters ---------- A : array_like, shape `(m, n)`. @@ -319,11 +324,13 @@ def rspca(X, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, verbose : bool ``{'True', 'False'}``, optional (default ``verbose = True``). Display progress. - p : integer, (default: `p=20`). - Parameter to control oversampling. + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. - q : integer, (default: `q=2`). - Parameter to control number of power (subspace) iterations. + n_subspace : integer, default: 1. + Parameter to control number of subspace iterations. Increasing this + parameter may improve numerical accuracy. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; @@ -350,10 +357,11 @@ def rspca(X, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, minimize :math:`1/2 \| X - X B A^T \|^2 + \alpha \|B\|_1 + 1/2 \beta \|B\|^2` """ # Shape of data matrix - m, n = X.shape + m = A.shape[0] # Compute QB decomposition - Q, Xcompressed = rqb(X, k=n_components, p=p, q=q, random_state=random_state) + Q, Xcompressed = rqb(A, rank=n_components, oversample=oversample, + n_subspace=n_subspace, random_state=random_state) # Compute Sparse PCA B, A, eigvals, obj = spca(Xcompressed, n_components=n_components, @@ -361,6 +369,6 @@ def rspca(X, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, max_iter=max_iter, tol=tol, verbose=verbose) # rescale eigen values - eigvals = eigvals * (n_components+p - 1) / (m-1) + eigvals = eigvals * (n_components + oversample - 1) / (m-1) - return(B, A, eigvals, obj) + return B, A, eigvals, obj diff --git a/ristretto/qb.py b/ristretto/qb.py index 6de66d3..0763929 100644 --- a/ristretto/qb.py +++ b/ristretto/qb.py @@ -5,61 +5,43 @@ # Joseph Knox # License: GNU General Public License v3.0 -# NOTE: we should depricate single_pass because: -# we get the same performance by perfoming a single subspace -# iteration, the single pass construct doesn't really help us -# here unless we write it in C/Cython and compute the products -# Y = A * Omega, Y_tilde = A * Omega_tilde -# in sinlge passes - import numpy as np from scipy import linalg -from .sketch.transforms import johnson_lindenstrauss +from .sketch.transforms import johnson_lindenstrauss, sparse_johnson_lindenstrauss from .utils import conjugate_transpose -def rqb(A, k=None, p=10, l=None, q=1, sdist='normal', single_pass=False, - random_state=None): +def rqb(A, rank, oversample=10, n_subspace=1, sparse=False, random_state=None): """Randomized QB Decomposition. Randomized algorithm for computing the approximate low-rank QB - decomposition of a rectangular `(m, n)` matrix `A`, with target rank `k << min{m, n}`. - The input matrix is factored as `A = Q * B`. + decomposition of a rectangular `(m, n)` matrix `A`, with target rank + `rank << min{m, n}`. The input matrix is factored as `A = Q * B`. The quality of the approximation can be controlled via the oversampling - parameter `p` and the parameter `q` which specifies the number of + parameter `oversample` and `n_subspace` which specifies the number of subspace iterations. Parameters ---------- A : array_like, shape `(m, n)`. - Real nonnegative input matrix. - - k : integer, `k << min{m,n}`. - Target rank. - - p : integer, default: `p=10`. - Parameter to control oversampling of column space. - - l : integer, default: `l=2*p`. - Parameter to control oversampling of row space. Only relevant if - single_pass == True. + Input array. - q : integer, default: `q=1`. - Parameter to control number of power (subspace) iterations. Only - relevant if single_pass == False. + rank : integer + Target rank. Best if `rank << min{m,n}` - sdist : str `{'uniform', 'normal'}`, default: `sdist='uniform'`. - 'uniform' : Random test matrix with uniform distributed elements. + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. - 'normal' : Random test matrix with normal distributed elements. + n_subspace : integer, default: 1. + Parameter to control number of subspace iterations. Increasing this + parameter may improve numerical accuracy. - single_pass : bool - If single_pass == True, perfom single pass of algorithm, meaning that - in the algorithm only accesses A directly a single time. Beneficial A - is large. + sparse : boolean, optional (default: False) + If sparse == True, perform compressed random qr decomposition. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; @@ -69,10 +51,10 @@ def rqb(A, k=None, p=10, l=None, q=1, sdist='normal', single_pass=False, Returns ------- - Q: array_like, shape `(m, k+p)`. + Q: array_like, shape `(m, rank + oversample)`. Orthonormal basis matrix. - B : array_like, shape `(k+p, n)`. + B : array_like, shape `(rank + oversample, n)`. Smaller matrix. @@ -90,16 +72,12 @@ def rqb(A, k=None, p=10, l=None, q=1, sdist='normal', single_pass=False, and GPU architectures" (2015). (available at `arXiv `_). """ - # get random sketch - if single_pass: - # NOTE: we get the same performance by perfoming a single subspace - # iteration, the single pass construct doesn't really help us - # here unless we write it in C/Cython and compute the products - # Y = A * Omega, Y_tilde = A * Omega_tilde - # in sinlge passes - q = 1 - - Q = johnson_lindenstrauss(A, k + p, n_subspace=q, random_state=random_state) + if sparse: + Q = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, + random_state=random_state) + else: + Q = sparse_johnson_lindenstrauss( + A, rank + oversample, n_subspace=n_subspace, random_state=random_state) #Project the data matrix a into a lower dimensional subspace B = conjugate_transpose(Q).dot(A) diff --git a/ristretto/sketch/transforms.py b/ristretto/sketch/transforms.py index 306bbc7..a259547 100644 --- a/ristretto/sketch/transforms.py +++ b/ristretto/sketch/transforms.py @@ -1,6 +1,9 @@ """ Functions for approximating the range of a matrix A. """ +from __future__ import division +from math import log + import numpy as np from scipy import fftpack from scipy import sparse @@ -57,7 +60,8 @@ def johnson_lindenstrauss(A, l, n_subspace=None, axis=1, random_state=None): return Q -def sparse_johnson_lindenstrauss(A, l, density=None, axis=1, random_state=None): +def sparse_johnson_lindenstrauss(A, l, n_subspace=1, density=None, axis=1, + random_state=None): """ Given an m x n matrix A, and an integer l, this scheme computes an m x l @@ -78,15 +82,21 @@ def sparse_johnson_lindenstrauss(A, l, density=None, axis=1, random_state=None): raise ValueError('If supplied, axis must be in (0, 1)') if density is None: - density = 1.0 / 3 + density = A.shape[0] / log(A.shape[0]) + # construct sparse sketch Omega = _sketches.sparse_random_map(A, l, axis, density, random_state) # project A onto Omega if axis == 0: - return safe_sparse_dot(Omega.T, A) - return safe_sparse_dot(A, Omega) + Q = safe_sparse_dot(Omega.T, A) + else: + Q = safe_sparse_dot(A, Omega) + + if n_subspace is not None: + Q = perform_subspace_iterations(A, Q, n_iter=n_subspace, axis=axis) + return Q def fast_johnson_lindenstrauss(A, l, axis=1, random_state=None): """ diff --git a/ristretto/svd.py b/ristretto/svd.py index 4641f17..865f864 100644 --- a/ristretto/svd.py +++ b/ristretto/svd.py @@ -1,10 +1,6 @@ """ -Compressed and Random Singular Value Decompositions. +Random Singular Value Decomposition. """ -# TODO: implement 'ortho' option -# TODO: implement 'method' option -# TODO: implement 'scaled' option -# TODO; implement 'sdists' for csvd_double # Authors: N. Benjamin Erichson # Joseph Knox @@ -17,287 +13,10 @@ from scipy import sparse from .qb import rqb -from .utils import check_random_state, conjugate_transpose +from .utils import conjugate_transpose -_VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) -_VALID_SDISTS = ('gaussian', 'spixel', 'sparse') - -def _sparse_sample(A, k, p, sdist, formatS, check_finite=False, random_state=None): - random_state = check_random_state(random_state) - - # converts A to array, raise ValueError if A has inf or nan - A = np.asarray_chkfinite(A) if check_finite else np.asarray(A) - m, n = A.shape - - if A.dtype not in _VALID_DTYPES: - raise ValueError('A.dtype must be one of %s, not %s' - % (' '.join(_VALID_DTYPES), A.dtype)) - - if sdist not in _VALID_SDISTS: - raise ValueError('sdists must be one of %s, not %s' - % (' '.join(_VALID_SDISTS), sdist)) - - if A.dtype == np.complexfloating: - real_type = np.float32 if A.dtype == np.complex64 else np.float64 - - # Generate random measurement matrix and compress input matrix - if sdist=='gaussian': - C = random_state.standard_normal(size=(k+p, m)).astype(A.dtype) - - if A.dtype == np.complexfloating: - C += 1j * random_state.standard_normal(size=(k+p , m)).astype(real_type) - Y = C.dot(A) - - elif sdist=='spixel': - C = random_state.sample(m, k+p) - Y = A[C, :] - - else: - # sdist == 'sparse' - density = m / np.log(m) - C = sparse.rand(k+p, m, density=density**-1, format=formatS, - dtype=real_type, random_state=random_state) - C.data = np.array(np.where(C.data >= 0.5, 1, -1), dtype=A.dtype) - Y = C.dot(A) - - return Y - - -def csvd(A, k=None, p=10, sdist='sparse', formatS='csr', random_state=None): - """Compressed Singular Value Decomposition. - - Row compressed algorithm for computing the approximate low-rank singular value - decomposition of a rectangular (m, n) matrix `A` with target rank `k << n`. - The input matrix a is factored as `A = U * diag(s) * Vt`. The left singular - vectors are the columns of the real or complex unitary matrix `U`. The right - singular vectors are the columns of the real or complex unitary matrix `V`. - The singular values `s` are non-negative and real numbers. - - - Parameters - ---------- - A : array_like - Real/complex input matrix `A` with dimensions `(m, n)`. - - k : int - `k` is the target rank of the low-rank decomposition, k << min(m,n). - - p : int - `p` oversampling parameter. The number of measurements will be `p+k` - - sdist : str `{gaussian', 'spixel', 'sparse'}` - Defines the sampling distribution. - - fortmatS : str `{csr, coo}` - Defines the format of the sparse measurement matrix. - - random_state : integer, RandomState instance or None, optional (default ``None``) - If integer, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used by np.random. - - - Returns - ------- - U: array_like - Right singular values, array of shape `(m, k)`. - - s : array_like - Singular values, 1-d array of length `k`. - - Vh : array_like - Left singular values, array of shape `(k, n)`. - - - Notes - ----- - If the sparse sampling distribution is used, the appropriate format for - the sparse measurement matrix is crucial. In generall `csr` is the optimal - format, but sometimes `coo` gives a better performance. Sparse matricies - are computational efficient if the leading dimension is m>5000. - """ - # get sparse sketch of A - Y = _sparse_sample(A, k, p, sdist, formatS, check_finite=True, - random_state=random_state) - - # Compute singular value decomposition - _ , s , Vh = linalg.svd(Y, full_matrices=False, overwrite_a=True, check_finite=False) - - # truncate - if k is not None: - s = s[:k] - Vh = Vh[:k, :] - - # Recover left-singular vectors - U, s, Q = linalg.svd(A.dot(conjugate_transpose(Vh)), - full_matrices=False, overwrite_a=True, check_finite=False) - - return U, s, Q.dot(Vh) - - -def csvd2(A, k=None, p=10, sdist='sparse', formatS='csr', random_state=None): - """Compressed Singular Value Decomposition. - - Row compressed algorithm for computing the approximate low-rank singular value - decomposition of a rectangular (m, n) matrix `A` with target rank `k << n`. - The input matrix a is factored as `A = U * diag(s) * Vt`. The left singular - vectors are the columns of the real or complex unitary matrix `U`. The right - singular vectors are the columns of the real or complex unitary matrix `V`. - The singular values `s` are non-negative and real numbers. - - - Parameters - ---------- - A : array_like - Real/complex input matrix `A` with dimensions `(m, n)`. - - k : int - `k` is the target rank of the low-rank decomposition, k << min(m,n). - - p : int - `p` oversampling parameter. The number of measurements will be `p+k` - - sdist : str `{gaussian', 'spixel', 'sparse'}` - Defines the sampling distribution. - - fortmatS : str `{csr, coo}` - Defines the format of the sparse measurement matrix. - - random_state : integer, RandomState instance or None, optional (default ``None``) - If integer, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used by np.random. - - - Returns - ------- - U: array_like - Right singular values, array of shape `(m, k)`. - - s : array_like - Singular values, 1-d array of length `k`. - - Vh : array_like - Left singular values, array of shape `(k, n)`. - - - Notes - ----- - If the sparse sampling distribution is used, the appropriate format for - the sparse measurement matrix is crucial. In generall `csr` is the optimal - format, but sometimes `coo` gives a better performance. Sparse matricies - are computational efficient if the leading dimension is m>5000. - """ - # get sparse sketch of A - Y = _sparse_sample(A, k, p, sdist, formatS, check_finite=True, - random_state=random_state) - - # Compute singular value decomposition - B = Y.dot(conjugate_transpose(Y)) - B = 0.5 * (B + conjugate_transpose(B)) - - l = k+p - lo, hi = (l-k), (l-1) # truncate - s, T = linalg.eigh(B, b=None, lower=True, eigvals_only=False, - overwrite_a=True, overwrite_b=False, turbo=True, eigvals=None, - type=1, check_finite=False) - - - # reverse the n first columns of u, and s - T[:, :l] = T[:, l-1::-1] - s = s[::-1] - - # truncate - if k is not None: - s = s[:k] - T = T[:, :k] - - mask = s > 0 - s[mask] = np.sqrt(s[mask]) - - V = conjugate_transpose(Y).dot(T[:,mask] / s[mask]) - - # Recover left-singular vectors - U, s, Vhstar = linalg.svd(A.dot(V), full_matrices=False, - overwrite_a=True, check_finite=False) - - return U, s, Vhstar.dot(conjugate_transpose(V)) - - -def csvd_double(A, k=None, p=10, formatS='csr', random_state=None): - """Compressed Singular Value Decomposition. - - Row compressed algorithm for computing the approximate low-rank singular value - decomposition of a rectangular (m, n) matrix `A` with target rank `k << n`. - The input matrix a is factored as `A = U * diag(s) * Vt`. The left singular - vectors are the columns of the real or complex unitary matrix `U`. The right - singular vectors are the columns of the real or complex unitary matrix `V`. - The singular values `s` are non-negative and real numbers. - - - Parameters - ---------- - A : array_like - Real/complex input matrix `A` with dimensions `(m, n)`. - - k : int - `k` is the target rank of the low-rank decomposition, k << min(m,n). - - p : int - `p` oversampling parameter. The number of measurements will be `p+k` - - fortmatS : str `{csr, coo}` - Defines the format of the sparse measurement matrix. - - random_state : integer, RandomState instance or None, optional (default ``None``) - If integer, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used by np.random. - - - Returns - ------- - U: array_like - Right singular values, array of shape `(m, k)`. - - s : array_like - Singular values, 1-d array of length `k`. - - Vh : array_like - Left singular values, array of shape `(k, n)`. - """ - random_state = check_random_state(random_state) - - # converts A to array, raise ValueError if A has inf or nan - A = np.asarray_chkfinite(A) - m, n = A.shape - - if A.dtype not in _VALID_DTYPES: - raise ValueError('A.dtype must be one of %s, not %s' - % (' '.join(_VALID_DTYPES), A.dtype)) - - # Generate random measurement matrix and compress input matrix - # Generate a random test matrix Omega - Omega = random_state.sample(n, k+p) - Psi = random_state.sample(m, k+p) - - L, _ = linalg.qr(A[:, Omega], mode='economic', check_finite=False, overwrite_a=False) - R, _ = linalg.qr(A[Psi, :].T, mode='economic', check_finite=False, overwrite_a=False) - - #Project the data matrix a into a lower dimensional subspace - #B = Q.T * A - D = conjugate_transpose(L).dot(A) - D = D.dot(R) - - # Compute singular value decomposition - U, s, Vh = linalg.svd(D, full_matrices=False, overwrite_a=True, check_finite=False) - - return L.dot(U), s, Vh.dot(R.T) - - -def rsvd(A, k=None, p=10, l=None, q=1, sdist='uniform', single_pass=False, - random_state=None): +def rsvd(A, rank, oversample=10, n_subspace=1, sparse=False, random_state=None): """Randomized Singular Value Decomposition. Randomized algorithm for computing the approximate low-rank singular value @@ -308,38 +27,28 @@ def rsvd(A, k=None, p=10, l=None, q=1, sdist='uniform', single_pass=False, The singular values `s` are non-negative and real numbers. The quality of the approximation can be controlled via the oversampling - parameter `p` and the parameter `q` which specifies the number of + parameter `oversample` and `n_subspace` which specifies the number of subspace iterations. - If k > (n/1.5), partial SVD or truncated SVD might be faster. - Parameters ---------- A : array_like, shape `(m, n)`. - Real nonnegative input matrix. - - k : integer, `k << min{m,n}`. - Target rank. - - p : integer, default: `p=10`. - Parameter to control oversampling of column space. + Input array. - l : integer, default: `l=2*p`. - Parameter to control oversampling of row space. Only relevant if - single_pass == True. + rank : integer + Target rank. Best if `rank << min{m,n}` - q : integer, default: `q=1`. - Parameter to control number of power (subspace) iterations. Only - relevant if single_pass == False. + oversample : integer, optional (default: 10) + Controls the oversampling of column space. Increasing this parameter + may improve numerical accuracy. - sdist : str `{'uniform', 'normal'}`, default: `sdist='uniform'`. - 'uniform' : Random test matrix with uniform distributed elements. + n_subspace : integer, default: 1. + Parameter to control number of subspace iterations. Increasing this + parameter may improve numerical accuracy. - 'normal' : Random test matrix with normal distributed elements. - - single_pass : bool - If single_pass == True, perfom single pass of algorithm. + sparse : boolean, optional (default: False) + If sparse == True, perform compressed rsvd. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; @@ -349,14 +58,19 @@ def rsvd(A, k=None, p=10, l=None, q=1, sdist='uniform', single_pass=False, Returns ------- - U: array_like, shape `(m, k)`. - Right singular values. + U: array_like + Right singular values, array of shape `(m, rank)`. + + s : array_like + Singular values, 1-d array of length `rank`. + + Vh : array_like + Left singular values, array of shape `(rank, n)`. - s : array_like, 1-d array of length `k`. - Singular values. - Vt : array_like, shape `(k, n)`. - Left singular values. + Notes + ----- + If rank > (n/1.5), partial SVD or truncated SVD might be faster. References @@ -384,8 +98,8 @@ def rsvd(A, k=None, p=10, l=None, q=1, sdist='uniform', single_pass=False, flipped = True # Compute QB decomposition - Q, B = rqb(A, k=k, p=p, l=l, q=q, sdist=sdist, single_pass=single_pass, - random_state=random_state) + Q, B = rqb(A, rank, oversample=oversample, n_subspace=n_subspace, + sparse=sparse, random_state=random_state) # Compute SVD U, s, Vt = linalg.svd(B, compute_uv=True, full_matrices=False, @@ -396,6 +110,6 @@ def rsvd(A, k=None, p=10, l=None, q=1, sdist='uniform', single_pass=False, # Return Trunc if flipped: - return conjugate_transpose(Vt)[:, :k], s[:k], conjugate_transpose(U)[:k, :] + return conjugate_transpose(Vt)[:, :rank], s[:rank], conjugate_transpose(U)[:rank, :] - return U[:, :k], s[:k], Vt[:k, :] + return U[:, :rank], s[:rank], Vt[:rank, :] diff --git a/ristretto/tests/test_dmd.py b/ristretto/tests/test_dmd.py index e2c0dbb..30de23c 100644 --- a/ristretto/tests/test_dmd.py +++ b/ristretto/tests/test_dmd.py @@ -21,17 +21,17 @@ def test_dmd(): F2 = ( (1./np.cosh(X)) * np.tanh(X)) *(2.*np.exp(1j*2.8*T)) A = np.array((F1+F2).T, order='C') - Fmodes, b, V, omega = dmd(A, k=2, modes='standard', + Fmodes, b, V, omega = dmd(A, 2, modes='standard', return_amplitudes=True, return_vandermonde=True) Atilde = Fmodes.dot( np.dot(np.diag(b), V)) assert np.allclose(A, Atilde, atol_float64) - Fmodes, b, V, omega = dmd(A, modes='standard', return_amplitudes=True, + Fmodes, b, V, omega = dmd(A, A.shape[1], modes='standard', return_amplitudes=True, return_vandermonde=True) Atilde = Fmodes.dot( np.dot(np.diag(b), V)) assert np.allclose(A, Atilde, atol_float64) - Fmodes, b, V, omega = dmd(A, modes='exact', return_amplitudes=True, + Fmodes, b, V, omega = dmd(A, A.shape[1], modes='exact', return_amplitudes=True, return_vandermonde=True) Atilde = Fmodes.dot( np.dot(np.diag(b), V)) assert np.allclose(A, Atilde, atol_float64) From e4deccd98b73c918f582297050ecaf899e5387eb Mon Sep 17 00:00:00 2001 From: Joseph Knox Date: Thu, 24 May 2018 13:30:52 -0700 Subject: [PATCH 6/7] Tests now align with PEP parameter changes. For the most part package now conforms with PEP parameter name guidelines. Still to do: - `pca` : factor pca/spca/robspca - `nmf` : factor and PEP parameter name eval - `sketch` : ensure row wise sketches are indeed row wise and write numerical accuracy checking tests Additionally, in general there can be a lot of improvement as far as code reproduction reduction in the testing modules. --- ristretto/dmd.py | 4 +- ristretto/eigen.py | 6 +- ristretto/externals/nmf.py | 2 +- ristretto/pca.py | 34 +++--- ristretto/qb.py | 6 +- ristretto/sketch/tests/test_transforms.py | 16 +++ ristretto/sketch/tests/test_utils.py | 6 +- ristretto/sketch/transforms.py | 4 +- ristretto/sketch/utils.py | 14 +-- ristretto/tests/__init__.py | 0 ristretto/tests/test_cur.py | 39 ++++--- ristretto/tests/test_dmd.py | 94 +++++---------- ristretto/tests/test_eigen.py | 63 +++++----- ristretto/tests/test_interp_decomp.py | 134 +++++++++------------- ristretto/tests/test_lu.py | 59 +++++----- ristretto/tests/test_pca.py | 75 +++++------- ristretto/tests/test_qb.py | 22 ++-- ristretto/tests/test_svd.py | 88 +++++--------- ristretto/tests/utils.py | 7 ++ 19 files changed, 295 insertions(+), 378 deletions(-) create mode 100644 ristretto/tests/__init__.py create mode 100644 ristretto/tests/utils.py diff --git a/ristretto/dmd.py b/ristretto/dmd.py index f4d5989..5532b05 100644 --- a/ristretto/dmd.py +++ b/ristretto/dmd.py @@ -97,8 +97,8 @@ def dmd(A, rank=None, dt=1, modes='exact', return_amplitudes=False, raise ValueError('A.dtype must be one of %s, not %s' % (' '.join(_VALID_DTYPES), A.dtype)) - if rank is not None and (rank < 1 or rank > n - 1): - raise ValueError('rank must be > 1 and less than n - 1') + if rank is not None and (rank < 1 or rank > n ): + raise ValueError('rank must be > 1 and less than n') #Split data into lef and right snapshot sequence X = A[:, :(n-1)] #pointer diff --git a/ristretto/eigen.py b/ristretto/eigen.py index c9de0ad..275c414 100644 --- a/ristretto/eigen.py +++ b/ristretto/eigen.py @@ -82,7 +82,7 @@ def reigh(A, rank, oversample=10, n_subspace=1, random_state=None): v[:, :A.shape[1]] = v[:, A.shape[1] - 1::-1] w = w[::-1] - return w[:k], Q.dot(v)[:,:k] + return w[:rank], Q.dot(v)[:,:rank] def reigh_nystroem(A, rank, oversample=10, n_subspace=1, random_state=None): @@ -154,7 +154,7 @@ def reigh_nystroem(A, rank, oversample=10, n_subspace=1, random_state=None): v[:, :A.shape[1]] = v[:, A.shape[1]-1::-1] w = w[::-1] - return w[:k], S.dot(v)[:,:k] + return w[:rank], S.dot(v)[:,:rank] # Upper triangular solve F = linalg.solve_triangular(C, conjugate_transpose(B1), lower=True, @@ -165,7 +165,7 @@ def reigh_nystroem(A, rank, oversample=10, n_subspace=1, random_state=None): v, w, _ = linalg.svd(conjugate_transpose(F), compute_uv=True, full_matrices=False, overwrite_a=True, check_finite=False) - return w[:k]**2, v[:,:k] + return w[:rank]**2, v[:,:rank] def reigh_nystroem_col(A, rank, oversample=0, random_state=None): diff --git a/ristretto/externals/nmf.py b/ristretto/externals/nmf.py index 9d99b27..10cd6ee 100644 --- a/ristretto/externals/nmf.py +++ b/ristretto/externals/nmf.py @@ -109,7 +109,7 @@ def _initialize_nmf(X, n_components, init=None, eps=1e-6, # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # NOTE: use our randomized_svd instead of scikit-learn #U, S, V = randomized_svd(X, n_components, random_state=random_state) - U, S, V = rsvd(X, n_components, p=20, q=2) + U, S, V = rsvd(X, n_components) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ W, H = np.zeros(U.shape), np.zeros(V.shape) diff --git a/ristretto/pca.py b/ristretto/pca.py index 765814c..423f1ec 100644 --- a/ristretto/pca.py +++ b/ristretto/pca.py @@ -13,7 +13,7 @@ from .qb import rqb -def spca(A, n_components, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, +def spca(X, n_components, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, verbose=True): r"""Sparse Principal Component Analysis (SPCA). @@ -26,7 +26,7 @@ def spca(A, n_components, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, Parameters ---------- - A : array_like, shape `(m, n)`. + X : array_like, shape `(m, n)`. Input array. n_components : integer, `n_components << min{m,n}`. @@ -67,12 +67,12 @@ def spca(A, n_components, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, minimize :math:`1/2 \| X - X B A^T \|^2 + \alpha \|B\|_1 + 1/2 \beta \|B\|^2` """ # Shape of input matrix - m = A.shape[0] + m = X.shape[0] #-------------------------------------------------------------------- # Initialization of Variable Projection Solver #-------------------------------------------------------------------- - _, D, Vt = linalg.svd(A, full_matrices=False, overwrite_a=False) + _, D, Vt = linalg.svd(X, full_matrices=False, overwrite_a=False) Dmax = D[0] # l2 norm A = Vt.T[:, 0:n_components] @@ -137,7 +137,7 @@ def spca(A, n_components, alpha=0.1, beta=0.01, max_iter=500, tol=1e-5, return(B, A, eigvals, obj) -def robspca(A, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, +def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, tol=1e-5, verbose=True): r"""Robust Sparse Principal Component Analysis (Robust SPCA). @@ -150,7 +150,7 @@ def robspca(A, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, Parameters ---------- - A : array_like, shape `(m, n)`. + X : array_like, shape `(m, n)`. Input array. n_components : integer, `n_components << min{m,n}`. @@ -201,10 +201,10 @@ def robspca(A, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, minimize :math:`1/2 \| X - X B A^T \|^2 + \alpha \|B\|_1 + 1/2 \beta \|B\|^2` """ # Shape of input matrix - m = A.shape[0] + m = X.shape[0] # Initialization of Variable Projection Solver - U, D, Vt = linalg.svd(A, full_matrices=False, overwrite_a=False) + U, D, Vt = linalg.svd(X, full_matrices=False, overwrite_a=False) Dmax = D[0] #l2 norm @@ -219,7 +219,7 @@ def robspca(A, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, beta *= Dmax**2 nu = 1.0 / (Dmax**2 + beta) kappa = nu * alpha - S = np.zeros_like(A) + S = np.zeros_like(X) # Apply Variable Projection Solver n_iter = 0 @@ -229,8 +229,8 @@ def robspca(A, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, # Update A: # X'XB = UDV' # Compute X'XB via SVD of X - XS = A - S - XB = A.dot(B) + XS = X - S + XB = X.dot(B) Z = (XS).T.dot(XB) Utilde, Dtilde, Vttilde = linalg.svd( Z , full_matrices=False, overwrite_a=True) @@ -239,7 +239,7 @@ def robspca(A, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, # Proximal Gradient Descent to Update B R = XS - XB.dot(A.T) - G = A.T.dot(R.dot(A)) - beta * B + G = X.T.dot(R.dot(A)) - beta * B B_temp = B + nu * G @@ -251,7 +251,7 @@ def robspca(A, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, B[idxL] = B_temp[idxL] + kappa # compute residual - R = A - A.dot(B).dot(A.T) + R = X - X.dot(B).dot(A.T) # l1 soft-threshold idxH = R > gamma @@ -283,7 +283,7 @@ def robspca(A, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, return B, A, S, eigvals, obj -def rspca(A, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, +def rspca(X, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, verbose=0, oversample=10, n_subspace=1, random_state=None): r"""Randomized Sparse Principal Component Analysis (rSPCA). @@ -303,7 +303,7 @@ def rspca(A, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, Parameters ---------- - A : array_like, shape `(m, n)`. + X : array_like, shape `(m, n)`. Real nonnegative input matrix. n_components : integer, `n_components << min{m,n}`. @@ -357,10 +357,10 @@ def rspca(A, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, minimize :math:`1/2 \| X - X B A^T \|^2 + \alpha \|B\|_1 + 1/2 \beta \|B\|^2` """ # Shape of data matrix - m = A.shape[0] + m = X.shape[0] # Compute QB decomposition - Q, Xcompressed = rqb(A, rank=n_components, oversample=oversample, + Q, Xcompressed = rqb(X, rank=n_components, oversample=oversample, n_subspace=n_subspace, random_state=random_state) # Compute Sparse PCA diff --git a/ristretto/qb.py b/ristretto/qb.py index 0763929..f3af33c 100644 --- a/ristretto/qb.py +++ b/ristretto/qb.py @@ -73,11 +73,11 @@ def rqb(A, rank, oversample=10, n_subspace=1, sparse=False, random_state=None): (available at `arXiv `_). """ if sparse: - Q = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, - random_state=random_state) - else: Q = sparse_johnson_lindenstrauss( A, rank + oversample, n_subspace=n_subspace, random_state=random_state) + else: + Q = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, + random_state=random_state) #Project the data matrix a into a lower dimensional subspace B = conjugate_transpose(Q).dot(A) diff --git a/ristretto/sketch/tests/test_transforms.py b/ristretto/sketch/tests/test_transforms.py index 440b0e1..97d989c 100644 --- a/ristretto/sketch/tests/test_transforms.py +++ b/ristretto/sketch/tests/test_transforms.py @@ -42,6 +42,14 @@ def test_johnson_linderstrauss(): assert row_trans.shape == (l, n) assert col_trans.shape == (m, l) + # ------------------------------------------------------------------------ + # tests subsapce iters + row_trans = johnson_lindenstrauss(A, l, n_subspace=2, axis=0) + col_trans = johnson_lindenstrauss(A, l, n_subspace=2, axis=1) + + assert row_trans.shape == (l, n) + assert col_trans.shape == (m, l) + # ------------------------------------------------------------------------ # tests raises incompatible axis assert_raises(ValueError, johnson_lindenstrauss, A, l, axis=2) @@ -64,6 +72,14 @@ def test_sparse_johnson_linderstrauss(): assert row_trans.shape == (l, n) assert col_trans.shape == (m, l) + # ------------------------------------------------------------------------ + # tests subsapce iters + row_trans = sparse_johnson_lindenstrauss(A, l, n_subspace=2, axis=0) + col_trans = sparse_johnson_lindenstrauss(A, l, n_subspace=2, axis=1) + + assert row_trans.shape == (l, n) + assert col_trans.shape == (m, l) + # ------------------------------------------------------------------------ # tests raises incompatible axis assert_raises(ValueError, sparse_johnson_lindenstrauss, A, l, axis=2) diff --git a/ristretto/sketch/tests/test_utils.py b/ristretto/sketch/tests/test_utils.py index da13949..de567ba 100644 --- a/ristretto/sketch/tests/test_utils.py +++ b/ristretto/sketch/tests/test_utils.py @@ -12,7 +12,7 @@ def test_orthonormalize(): B = orthonormalize(A, overwrite_a=False) over = orthonormalize(A, overwrite_a=True) - + assert A is not B # TODO: when does overwrite_a even work? (fortran?) #assert A is over @@ -34,8 +34,8 @@ def test_perform_subspace_iterations(): Q_row = np.eye(10)[:5] Q_col = np.eye(10)[:,:5] - rowwise = perform_subspace_iterations(A, Q_row, axis=0) - colwise = perform_subspace_iterations(A, Q_col, axis=1) + rowwise = perform_subspace_iterations(A, Q_row, n_iter=2, axis=0) + colwise = perform_subspace_iterations(A, Q_col, n_iter=2, axis=1) assert rowwise.shape == Q_row.shape assert colwise.shape == Q_col.shape diff --git a/ristretto/sketch/transforms.py b/ristretto/sketch/transforms.py index a259547..9b57c9b 100644 --- a/ristretto/sketch/transforms.py +++ b/ristretto/sketch/transforms.py @@ -60,7 +60,7 @@ def johnson_lindenstrauss(A, l, n_subspace=None, axis=1, random_state=None): return Q -def sparse_johnson_lindenstrauss(A, l, n_subspace=1, density=None, axis=1, +def sparse_johnson_lindenstrauss(A, l, n_subspace=None, density=None, axis=1, random_state=None): """ @@ -82,7 +82,7 @@ def sparse_johnson_lindenstrauss(A, l, n_subspace=1, density=None, axis=1, raise ValueError('If supplied, axis must be in (0, 1)') if density is None: - density = A.shape[0] / log(A.shape[0]) + density = log(A.shape[0]) / A.shape[0] # construct sparse sketch Omega = _sketches.sparse_random_map(A, l, axis, density, random_state) diff --git a/ristretto/sketch/utils.py b/ristretto/sketch/utils.py index 2134b18..1404ca4 100644 --- a/ristretto/sketch/utils.py +++ b/ristretto/sketch/utils.py @@ -1,5 +1,5 @@ """ -Module containing utility functions for +Module containing utility functions for """ from scipy import linalg @@ -24,12 +24,12 @@ def perform_subspace_iterations(A, Q, n_iter=1, axis=0): # perform subspace iterations for _ in range(n_iter): - #if axis == 0: - # Z = orthonormalize(A.dot(Q.T)) - # Q = orthonormalize(A.T.dot(Z)) - #else: - Z = orthonormalize(A.T.dot(Q)) - Q = orthonormalize(A.dot(Z)) + if axis == 0: + Z = orthonormalize(A.dot(Q)) + Q = orthonormalize(A.T.dot(Z)) + else: + Z = orthonormalize(A.T.dot(Q)) + Q = orthonormalize(A.dot(Z)) if axis == 0: return Q.T diff --git a/ristretto/tests/__init__.py b/ristretto/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ristretto/tests/test_cur.py b/ristretto/tests/test_cur.py index ddc2a9d..2093a7d 100644 --- a/ristretto/tests/test_cur.py +++ b/ristretto/tests/test_cur.py @@ -1,8 +1,13 @@ +from __future__ import division + import numpy as np +from scipy import linalg from ristretto.cur import cur from ristretto.cur import rcur +from .utils import relative_error + atol_float32 = 1e-4 atol_float64 = 1e-8 @@ -12,19 +17,18 @@ # ============================================================================= def test_cur(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] + A = np.random.randn(m, k).astype(np.float64) + A = A.dot(A.T)[:, :50] # index_set = False - C, U, R = cur(A, k=k+2, index_set=False) - relative_error = (np.linalg.norm(A - C.dot(U).dot(R)) / np.linalg.norm(A)) - assert relative_error < 1e-4 + C, U, R = cur(A, rank=k+2, index_set=False) + A_cur = C.dot(U).dot(R) + assert relative_error(A, A_cur) < atol_float32 # index_set = True - C, U, R = cur(A, k=k+2, index_set=True) - relative_error = (np.linalg.norm(A - A[:,C].dot(U).dot(A[R,:])) / np.linalg.norm(A)) - assert relative_error < 1e-4 + C, U, R = cur(A, rank=k+2, index_set=True) + A_cur = A[:, C].dot(U).dot(A[R]) + assert relative_error(A, A_cur) < atol_float32 # ============================================================================= @@ -32,16 +36,15 @@ def test_cur(): # ============================================================================= def test_rcur(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] + A = np.random.randn(m, k).astype(np.float64) + A = A.dot(A.T)[:, :50] # index_set = False - C, U, R = rcur(A, k=k+2, index_set=False) - relative_error = (np.linalg.norm(A - C.dot(U).dot(R)) / np.linalg.norm(A)) - assert relative_error < 1e-4 + C, U, R = rcur(A, k+2, index_set=False) + A_cur = C.dot(U).dot(R) + assert relative_error(A, A_cur) < atol_float32 # index_set = True - C, U, R = rcur(A, k=k+2, index_set=True) - relative_error = (np.linalg.norm(A - A[:,C].dot(U).dot(A[R,:])) / np.linalg.norm(A)) - assert relative_error < 1e-4 + C, U, R = rcur(A, k+2, index_set=True) + A_cur = A[:, C].dot(U).dot(A[R]) + assert relative_error(A, A_cur) < atol_float32 diff --git a/ristretto/tests/test_dmd.py b/ristretto/tests/test_dmd.py index 30de23c..ee5dc24 100644 --- a/ristretto/tests/test_dmd.py +++ b/ristretto/tests/test_dmd.py @@ -6,85 +6,53 @@ atol_float32 = 1e-4 atol_float64 = 1e-8 +def get_A(): + # Define time and space discretizations + x = np.linspace( -10, 10, 100) + t = np.linspace(0, 8*np.pi , 60) + X, T = np.meshgrid(x, t) + + # Create two patio-temporal patterns + F1 = 0.5 * np.cos(X) * (1. + 0. * T) + F2 = ((1./np.cosh(X)) * np.tanh(X)) * (2 * np.exp(1j * 2.8 * T)) + + return np.array((F1 + F2).T, order='C') + + +def A_tilde(Fmodes, b, V): + return Fmodes.dot(np.diag(b).dot(V)) # ============================================================================= # dmd function # ============================================================================= def test_dmd(): - # Define time and space discretizations - x=np.linspace( -10, 10, 100) - t=np.linspace(0, 8*np.pi , 60) - dt=t[2]-t[1] - X, T = np.meshgrid(x,t) - # Create two patio-temporal patterns - F1 = 0.5* np.cos(X)*(1.+0.* T) - F2 = ( (1./np.cosh(X)) * np.tanh(X)) *(2.*np.exp(1j*2.8*T)) - A = np.array((F1+F2).T, order='C') + A = get_A() - Fmodes, b, V, omega = dmd(A, 2, modes='standard', + # ------------------------------------------------------------------------ + # tests mode == standard + Fmodes, b, V, omega = dmd(A, rank=2, modes='standard', return_amplitudes=True, return_vandermonde=True) - Atilde = Fmodes.dot( np.dot(np.diag(b), V)) - assert np.allclose(A, Atilde, atol_float64) - - Fmodes, b, V, omega = dmd(A, A.shape[1], modes='standard', return_amplitudes=True, - return_vandermonde=True) - Atilde = Fmodes.dot( np.dot(np.diag(b), V)) - assert np.allclose(A, Atilde, atol_float64) + assert np.allclose(A, A_tilde(Fmodes, b, V), atol_float64) - Fmodes, b, V, omega = dmd(A, A.shape[1], modes='exact', return_amplitudes=True, - return_vandermonde=True) - Atilde = Fmodes.dot( np.dot(np.diag(b), V)) - assert np.allclose(A, Atilde, atol_float64) + # ------------------------------------------------------------------------ + # tests mode == exact, rank == A.shape[1] + Fmodes, b, V, omega = dmd(A, rank=A.shape[1], modes='exact', + return_amplitudes=True, return_vandermonde=True) + assert np.allclose(A, A_tilde(Fmodes, b, V), atol_float64) + # ------------------------------------------------------------------------ + # tests mode == exact_scaled, rank == None Fmodes, b, V, omega = dmd(A, modes='exact_scaled', return_amplitudes=True, return_vandermonde=True) - Atilde = Fmodes.dot( np.dot(np.diag(b), V)) - assert np.allclose(A, Atilde, atol_float64) + assert np.allclose(A, A_tilde(Fmodes, b, V), atol_float64) # ============================================================================= # rdmd function # ============================================================================= def test_rdmd(): - # Define time and space discretizations - x=np.linspace( -10, 10, 100) - t=np.linspace(0, 8*np.pi , 60) - dt=t[2]-t[1] - X, T = np.meshgrid(x,t) - # Create two patio-temporal patterns - F1 = 0.5* np.cos(X)*(1.+0.* T) - F2 = ( (1./np.cosh(X)) * np.tanh(X)) *(2.*np.exp(1j*2.8*T)) - A = np.array((F1+F2).T, order='C') - - Fmodes, b, V, omega = rdmd(A, k=2, p=10, q=2, sdist='uniform', - return_amplitudes=True, return_vandermonde=True) - Atilde = Fmodes.dot( np.dot(np.diag(b), V)) - assert np.allclose(A, Atilde, atol_float64) - - Fmodes, b, V, omega = rdmd(A, k=2, p=10, q=2, sdist='normal', - return_amplitudes=True, return_vandermonde=True) - Atilde = Fmodes.dot( np.dot(np.diag(b), V)) - assert np.allclose(A, Atilde, atol_float64) - - -def test_rdmd_single_pass(): - # Define time and space discretizations - x=np.linspace( -10, 10, 100) - t=np.linspace(0, 8*np.pi , 60) - dt=t[2]-t[1] - X, T = np.meshgrid(x,t) - - # Create two patio-temporal patterns - F1 = 0.5* np.cos(X)*(1.+0.* T) - F2 = ( (1./np.cosh(X)) * np.tanh(X)) *(2.*np.exp(1j*2.8*T)) - A = np.array((F1+F2).T, order='C') - - Fmodes, b, V, omega = rdmd(A, k=2, p=10, l=20, sdist='uniform', single_pass=True, - return_amplitudes=True, return_vandermonde=True) - Atilde = Fmodes.dot( np.dot(np.diag(b), V)) - assert np.allclose(A, Atilde, atol_float64) + A = get_A() - Fmodes, b, V, omega = rdmd(A, k=2, p=10, l=20, sdist='orthogonal', single_pass=True, + Fmodes, b, V, omega = rdmd(A, 2, oversample=10, n_subspace=2, return_amplitudes=True, return_vandermonde=True) - Atilde = Fmodes.dot( np.dot(np.diag(b), V)) - assert np.allclose(A, Atilde, atol_float64) + assert np.allclose(A, A_tilde(Fmodes, b, V), atol_float64) diff --git a/ristretto/tests/test_eigen.py b/ristretto/tests/test_eigen.py index e3f3056..5f34025 100644 --- a/ristretto/tests/test_eigen.py +++ b/ristretto/tests/test_eigen.py @@ -1,9 +1,14 @@ +from __future__ import division + import numpy as np +from scipy import linalg from ristretto.eigen import reigh from ristretto.eigen import reigh_nystroem from ristretto.eigen import reigh_nystroem_col +from .utils import relative_error + atol_float32 = 1e-4 atol_float64 = 1e-8 @@ -13,27 +18,25 @@ # ============================================================================= def test_reigh_float64(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) + A = np.random.randn(m, k).astype(np.float64) A = A.dot(A.T) - w, v = reigh(A, k=k, p=5, q=2) - Ak = (v*w).dot(v.T) + w, v = reigh(A, k, oversample=5, n_subspace=2) + Ak = (v * w).dot(v.T) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 def test_reigh_complex128(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * \ - np.array(np.random.randn(m, k), np.float64) + A = np.random.randn(m, k).astype(np.float64) + \ + 1j * np.random.randn(m, k).astype(np.float64) A = A.dot(A.conj().T) - w, v = reigh(A, k=k, p=10, q=2) - Ak = (v*w).dot(v.conj().T) + w, v = reigh(A, k, oversample=10, n_subspace=2) + Ak = (v * w).dot(v.conj().T) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 # ============================================================================= @@ -41,26 +44,25 @@ def test_reigh_complex128(): # ============================================================================= def test_reig_nystroem_float64(): m, k = 20, 10 - A = np.array(np.random.randn(m, k), np.float64) + A = np.random.randn(m, k).astype(np.float64) A = A.dot(A.T) - w, v = reigh_nystroem(A, k=k, p=0, q=2) - Ak = (v*w).dot(v.T) + w, v = reigh_nystroem(A, k, oversample=0, n_subspace=2) + Ak = (v * w).dot(v.T) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 def test_reig_nystroem_complex128(): m, k = 20, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * np.array(np.random.randn(m, k), np.float64) + A = np.random.randn(m, k).astype(np.float64) + \ + 1j * np.random.randn(m, k).astype(np.float64) A = A.dot(A.conj().T) - w, v = reigh_nystroem(A, k=k, p=0, q=2) - Ak = (v*w).dot(v.conj().T) + w, v = reigh_nystroem(A, k, oversample=0, n_subspace=2) + Ak = (v * w).dot(v.conj().T) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 # ============================================================================= @@ -68,23 +70,22 @@ def test_reig_nystroem_complex128(): # ============================================================================= def test_reig_nystroem_col_float64(): m, k = 20, 10 - A = np.array(np.random.randn(m, k), np.float64) + A = np.random.randn(m, k).astype(np.float64) A = A.dot(A.T) - w, v = reigh_nystroem_col(A, k=k, p=0) - Ak = (v*w).dot(v.T) + w, v = reigh_nystroem_col(A, k, oversample=0) + Ak = (v * w).dot(v.T) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 def test_reig_nystroem_col_complex128(): m, k = 20, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * np.array(np.random.randn(m, k), np.float64) + A = np.random.randn(m, k).astype(np.float64) + \ + 1j * np.random.randn(m, k).astype(np.float64) A = A.dot(A.conj().T) - w, v = reigh_nystroem_col(A, k=k, p=0) - Ak = (v*w).dot(v.conj().T) + w, v = reigh_nystroem_col(A, k, oversample=0) + Ak = (v * w).dot(v.conj().T) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 diff --git a/ristretto/tests/test_interp_decomp.py b/ristretto/tests/test_interp_decomp.py index bd57ea7..1e6c04a 100644 --- a/ristretto/tests/test_interp_decomp.py +++ b/ristretto/tests/test_interp_decomp.py @@ -1,8 +1,12 @@ +from __future__ import division + import numpy as np +from scipy import linalg from ristretto.interp_decomp import interp_decomp from ristretto.interp_decomp import rinterp_decomp -from ristretto.interp_decomp import rinterp_decomp_qb + +from .utils import relative_error atol_float32 = 1e-4 atol_float64 = 1e-8 @@ -13,35 +17,38 @@ # ============================================================================= def test_id_col(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] + A = np.random.randn(m, k).astype(np.float64) + A = A.dot(A.T)[:, :50] - # index_set = False - C, V = interp_decomp(A, k=k+2, mode='column', index_set=False) - relative_error = (np.linalg.norm(A - C.dot(V)) / np.linalg.norm(A)) - assert relative_error < 1e-4 + # ------------------------------------------------------------------------ + # test index_set == False + C, V = interp_decomp(A, rank=k+2, mode='column', index_set=False) + A_id = C.dot(V) + assert relative_error(A, A_id) < atol_float32 + + # ------------------------------------------------------------------------ + # test index_set == True + C, V = interp_decomp(A, rank=k+2, mode='column', index_set=True) + A_id = A[:, C].dot(V) + assert relative_error(A, A_id) < atol_float32 - # index_set = True - C, V = interp_decomp(A, k=k+2, mode='column', index_set=True) - relative_error = (np.linalg.norm(A - A[:,C].dot(V)) / np.linalg.norm(A)) - assert relative_error < 1e-4 def test_id_row(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] + A = np.random.randn(m, k).astype(np.float64) + A = A.dot(A.T)[:, :50] - # index_set = False - Z, R = interp_decomp(A, k=k+2, mode='row', index_set=False) - relative_error = (np.linalg.norm(A - Z.dot(R)) / np.linalg.norm(A)) - assert relative_error < 1e-4 + # ------------------------------------------------------------------------ + # test index_set == False + Z, R = interp_decomp(A, k+2, mode='row', index_set=False) + A_id = Z.dot(R) + assert relative_error(A, A_id) < atol_float32 - # index_set = True - Z, R = interp_decomp(A, k=k+2, mode='row', index_set=True) - relative_error = (np.linalg.norm(A - Z.dot(A[R,:])) / np.linalg.norm(A)) - assert relative_error < 1e-4 + # ------------------------------------------------------------------------ + # test index_set == True + Z, R = interp_decomp(A, k+2, mode='row', index_set=True) + A_id = Z.dot(A[R, :]) + assert relative_error(A, A_id) < atol_float32 # ============================================================================= @@ -49,70 +56,33 @@ def test_id_row(): # ============================================================================= def test_rid_col(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] + A = np.random.randn(m, k).astype(np.float64) + A = A.dot(A.T)[:, :50] # index_set = False - C, V = rinterp_decomp(A, k=k+2, mode='column', index_set=False) - relative_error = (np.linalg.norm(A - C.dot(V)) / np.linalg.norm(A)) - assert relative_error < 1e-4 + C, V = rinterp_decomp(A, k+2, mode='column', index_set=False) + A_id = C.dot(V) + assert relative_error(A, A_id) < atol_float32 # index_set = True - C, V = rinterp_decomp(A, k=k+2, mode='column', index_set=True) - relative_error = (np.linalg.norm(A - A[:,C].dot(V)) / np.linalg.norm(A)) - assert relative_error < 1e-4 + C, V = rinterp_decomp(A, k+2, mode='column', index_set=True) + A_id = A[:, C].dot(V) + assert relative_error(A, A_id) < atol_float32 def test_rid_row(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] - - # index_set = False - Z, R = rinterp_decomp(A, k=k+2, mode='row', index_set=False) - relative_error = (np.linalg.norm(A - Z.dot(R)) / np.linalg.norm(A)) - assert relative_error < 1e-4 - - # index_set = True - Z, R = rinterp_decomp(A, k=k+2, mode='row', index_set=True) - relative_error = (np.linalg.norm(A - Z.dot(A[R,:])) / np.linalg.norm(A)) - assert relative_error < 1e-4 - - -# ============================================================================= -# rinterp_decomp_qb function -# ============================================================================= -def test_ridqb_col(): - m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] - - # index_set = False - C, V = rinterp_decomp_qb(A, k=k+2, mode='column', index_set=False) - relative_error = (np.linalg.norm(A - C.dot(V)) / np.linalg.norm(A)) - assert relative_error < 1e-4 - - # index_set = True - C, V = rinterp_decomp_qb(A, k=k+2, mode='column', index_set=True) - relative_error = (np.linalg.norm(A - A[:,C].dot(V)) / np.linalg.norm(A)) - assert relative_error < 1e-4 - - -def test_ridqb_row(): - m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] - - # index_set = False - Z, R = rinterp_decomp_qb(A, k=k+2, mode='row', index_set=False) - relative_error = (np.linalg.norm(A - Z.dot(R)) / np.linalg.norm(A)) - assert relative_error < 1e-4 - - # index_set = True - Z, R = rinterp_decomp_qb(A, k=k+2, mode='row', index_set=True) - relative_error = (np.linalg.norm(A - Z.dot(A[R,:])) / np.linalg.norm(A)) - assert relative_error < 1e-4 + A = np.random.randn(m, k).astype(np.float64) + A = A.dot(A.T)[:, :50] + + # ------------------------------------------------------------------------ + # test index_set == False + Z, R = rinterp_decomp(A, k+2, mode='row', index_set=False) + A_id = Z.dot(R) + assert relative_error(A, A_id) < atol_float32 + + # ------------------------------------------------------------------------ + # test index_set == True + Z, R = rinterp_decomp(A, k+2, mode='row', index_set=True) + A_id = Z.dot(A[R, :]) + assert relative_error(A, A_id) < atol_float32 diff --git a/ristretto/tests/test_lu.py b/ristretto/tests/test_lu.py index 814b8c0..e3ab31b 100644 --- a/ristretto/tests/test_lu.py +++ b/ristretto/tests/test_lu.py @@ -2,6 +2,8 @@ from ristretto.lu import rlu +from .utils import relative_error + atol_float32 = 1e-4 atol_float64 = 1e-8 @@ -11,47 +13,40 @@ # ============================================================================= def test_rlu_float64(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] - P, L, U, Q = rlu(A, permute=False, k=k, p=5, q=2) + A = np.random.randn(m, k).astype(np.float64) + A = A.dot(A.T)[:, :50] + + # ------------------------------------------------------------------------ + # test wth permute == False + P, L, U, Q = rlu(A, k, oversample=5, n_subspace=2, permute=False) Ak = P.dot(L.dot(U)).dot(Q) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 + # ------------------------------------------------------------------------ + # test wth permute == True + L, U = rlu(A, k, oversample=5, n_subspace=2, permute=True) + Ak = L.dot(U) -def test_rlu_complex128(): - m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * \ - np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.conj().T) - A = A[:,0:50] - P, L, U, Q = rlu(A, permute=False, k=k, p=5, q=2) - Ak = P.dot(L.dot(U)).dot(Q) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) + assert relative_error(A, Ak) < atol_float64 - assert percent_error < atol_float64 -def test_rlu_permute_float64(): +def test_rlu_complex128(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] - L, U, = rlu(A, permute=True, k=k, p=5, q=2) - Ak = L.dot(U) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) + A = np.random.randn(m, k).astype(np.float64) + \ + 1j * np.random.randn(m, k).astype(np.float64) + A = A.dot(A.conj().T)[:, :50] - assert percent_error < atol_float64 + # ------------------------------------------------------------------------ + # test wth permute == False + P, L, U, Q = rlu(A, k, oversample=5, n_subspace=2, permute=False) + Ak = P.dot(L.dot(U)).dot(Q) + assert relative_error(A, Ak) < atol_float64 -def test_rlu_permute_complex128(): - m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.conj().T) - A = A[:,0:50] - L, U= rlu(A, permute=True, k=k, p=5, q=2) + # ------------------------------------------------------------------------ + # test wth permute == True + L, U = rlu(A, k, oversample=5, n_subspace=2, permute=True) Ak = L.dot(U) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 diff --git a/ristretto/tests/test_pca.py b/ristretto/tests/test_pca.py index c6f8172..a127b50 100644 --- a/ristretto/tests/test_pca.py +++ b/ristretto/tests/test_pca.py @@ -4,74 +4,61 @@ from ristretto.pca import rspca from ristretto.pca import robspca +from .utils import relative_error + +atol_float16 = 1e-2 atol_float32 = 1e-4 atol_float64 = 1e-8 +alpha = 1e-3 +beta = 1e-5 -# ============================================================================= -# spca function -# ============================================================================= -def test_spca(): +def get_A(): # Create Data m = 10000 V1 = np.array(np.random.standard_normal(m) * 290).reshape(-1, 1) V2 = np.array(np.random.standard_normal(m) * 399).reshape(-1, 1) V3 = -0.1*V1 + 0.1*V2 + np.array(np.random.standard_normal(m) * 100).reshape(-1, 1) - A = np.concatenate((V1,V1,V1,V1, V2,V2,V2,V2, V3,V3), axis=1) - - alpha = 1e-3 - beta = 1e-5 + return np.concatenate((V1,V1,V1,V1, V2,V2,V2,V2, V3,V3), axis=1) - Bstar, Astar, eigvals, obj = spca(A, n_components=3, max_iter=100, - alpha=alpha, beta=beta, verbose=0) - - relative_error = (np.linalg.norm(A - A.dot(Bstar).dot(Astar.T)) / np.linalg.norm(A)) - assert relative_error < 1e-2 - # ============================================================================= -# rspca function +# spca function # ============================================================================= -def test_rspca(): - - # Create Data - m = 10000 - V1 = np.array(np.random.standard_normal(m) * 290).reshape(-1, 1) - V2 = np.array(np.random.standard_normal(m) * 399).reshape(-1, 1) - V3 = -0.1*V1 + 0.1*V2 + np.array(np.random.standard_normal(m) * 100).reshape(-1, 1) - - A = np.concatenate((V1,V1,V1,V1, V2,V2,V2,V2, V3,V3), axis=1) - - alpha = 1e-3 - beta = 1e-5 +def test_spca(): + A = get_A() - Bstar, Astar, eigvals, obj = rspca(A, n_components=3, p=10, q=2, max_iter=100, - alpha=alpha, beta=beta, verbose=0) + Bstar, Astar, eigvals, obj = spca(A, n_components=3, max_iter=100, + alpha=alpha, beta=beta, verbose=0) - relative_error = (np.linalg.norm(A - A.dot(Bstar).dot(Astar.T)) / np.linalg.norm(A)) - assert relative_error < 1e-2 + A_pca = A.dot(Bstar).dot(Astar.T) + assert relative_error(A, A_pca) < atol_float16 # ============================================================================= # robspca function # ============================================================================= def test_robspca(): - # Create Data - m = 10000 - V1 = np.array(np.random.standard_normal(m) * 290).reshape(-1, 1) - V2 = np.array(np.random.standard_normal(m) * 399).reshape(-1, 1) - V3 = -0.1*V1 + 0.1*V2 + np.array(np.random.standard_normal(m) * 100).reshape(-1, 1) - - A = np.concatenate((V1,V1,V1,V1, V2,V2,V2,V2, V3,V3), axis=1) - - alpha = 1e-3 - beta = 1e-5 gamma = 10 - + A = get_A() Bstar, Astar, S, eigvals, obj = robspca(A, n_components=3, max_iter=100, alpha=alpha, beta=beta, gamma=gamma, verbose=0) - relative_error = (np.linalg.norm(A - A.dot(Bstar).dot(Astar.T)) / np.linalg.norm(A)) - assert relative_error < 1e-2 + A_pca = A.dot(Bstar).dot(Astar.T) + assert relative_error(A, A_pca) < atol_float16 + + +# ============================================================================= +# rspca function +# ============================================================================= +def test_rspca(): + A = get_A() + + Bstar, Astar, eigvals, obj = rspca(A, n_components=3, oversample=10, + n_subspace=2, max_iter=100, + alpha=alpha, beta=beta, verbose=0) + + A_pca = A.dot(Bstar).dot(Astar.T) + assert relative_error(A, A_pca) < atol_float16 diff --git a/ristretto/tests/test_qb.py b/ristretto/tests/test_qb.py index 43e5876..81b762f 100644 --- a/ristretto/tests/test_qb.py +++ b/ristretto/tests/test_qb.py @@ -2,6 +2,8 @@ from ristretto.qb import rqb +from .utils import relative_error + atol_float32 = 1e-4 atol_float64 = 1e-8 @@ -11,22 +13,22 @@ # ============================================================================= def test_rqb_float64(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot( A.T ) - Q, B = rqb(A, k=k, p=5, q=2) + A = np.random.randn(m, k).astype(np.float64) + A = A.dot(A.T) + + Q, B = rqb(A, k, oversample=5, n_subspace=2) Ak = Q.dot(B) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 def test_rqb_complex128(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * \ - np.array(np.random.randn(m, k), np.float64) + A = np.random.randn(m, k).astype(np.float64) + \ + 1j * np.random.randn(m, k).astype(np.float64) A = A.dot(A.conj().T) - Q, B = rqb(A, k=k, p=5, q=2) + + Q, B = rqb(A, k, oversample=5, n_subspace=2) Ak = Q.dot(B) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 diff --git a/ristretto/tests/test_svd.py b/ristretto/tests/test_svd.py index f6e558b..1fa826a 100644 --- a/ristretto/tests/test_svd.py +++ b/ristretto/tests/test_svd.py @@ -2,6 +2,8 @@ from ristretto.svd import rsvd +from .utils import relative_error + atol_float32 = 1e-4 atol_float64 = 1e-8 @@ -11,78 +13,44 @@ # ============================================================================= def test_rsvd_float64(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot( A.T ) - U, s, Vt = rsvd(A, k=k, p=5, q=2) - Ak = U.dot(np.diag(s).dot(Vt)) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - - assert percent_error < atol_float64 - + A = np.random.randn(m, k).astype(np.float64) + A = A.dot(A.T) -def test_rsvd_complex128(): - m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * \ - np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.conj().T) - U, s, Vh = rsvd(A, k=k, p=5, q=2) - Ak = U.dot(np.diag(s).dot(Vh)) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) + # ------------------------------------------------------------------------ + # test normal + U, s, Vt = rsvd(A, k, oversample=5, n_subspace=2) + Ak = U.dot(np.diag(s).dot(Vt)) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 + # ------------------------------------------------------------------------ + # test transposed + A = A[:, :50].T -def test_rsvd_fliped_float64(): - m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.T) - A = A[:,0:50] - U, s, Vh = rsvd(A.T, k=k, p=5, q=2) - Ak = U.dot(np.diag(s).dot(Vh)) - percent_error = 100 * np.linalg.norm(A.T - Ak) / np.linalg.norm(A.T) + U, s, Vt = rsvd(A, k, oversample=5, n_subspace=2) + Ak = U.dot(np.diag(s).dot(Vt)) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 -def test_rsvd_fliped_complex128(): +def test_rsvd_complex128(): m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * \ - np.array(np.random.randn(m, k), np.float64) + A = np.random.randn(m, k).astype(np.float64) + \ + 1j * np.random.randn(m, k).astype(np.float64) A = A.dot(A.conj().T) - A = A[:,0:50] - U, s, Vh = rsvd(A.conj().T, k=k, p=5, q=2) - Ak = U.dot(np.diag(s).dot(Vh)) - percent_error = 100 * np.linalg.norm(A.conj().T - Ak) / np.linalg.norm(A.conj().T) - - assert percent_error < atol_float64 - -def test_rsvd_single_float64(): - m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) - A = A.dot( A.T ) - U, s, Vt = rsvd(A, k=k, p=5, single_pass=True) + # ------------------------------------------------------------------------ + # test normal + U, s, Vt = rsvd(A, k, oversample=5, n_subspace=2) Ak = U.dot(np.diag(s).dot(Vt)) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - assert percent_error < atol_float64 + assert relative_error(A, Ak) < atol_float64 + # ------------------------------------------------------------------------ + # test transposed + A = A[:, :50].conj().T -def test_rsvd_single_complex128(): - m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * \ - np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.conj().T) - U, s, Vh = rsvd(A, k=k, p=5, single_pass=True) - Ak = U.dot(np.diag(s).dot(Vh)) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) - + U, s, Vt = rsvd(A, k, oversample=5, n_subspace=2) + Ak = U.dot(np.diag(s).dot(Vt)) -def test_rsvd_single_orthogonal_complex128(): - m, k = 100, 10 - A = np.array(np.random.randn(m, k), np.float64) + 1j * \ - np.array(np.random.randn(m, k), np.float64) - A = A.dot(A.conj().T) - U, s, Vh = rsvd(A, k=k, p=5, sdist='orthogonal', single_pass=True) - Ak = U.dot(np.diag(s).dot(Vh)) - percent_error = 100 * np.linalg.norm(A - Ak) / np.linalg.norm(A) + assert relative_error(A, Ak) < atol_float64 diff --git a/ristretto/tests/utils.py b/ristretto/tests/utils.py new file mode 100644 index 0000000..6fa597b --- /dev/null +++ b/ristretto/tests/utils.py @@ -0,0 +1,7 @@ +from __future__ import division + +from scipy import linalg + + +def relative_error(A, B): + return linalg.norm(A - B) / linalg.norm(A) From c685abd8409da723cc345b31312bd45c7b3a7513 Mon Sep 17 00:00:00 2001 From: Joseph Knox Date: Thu, 24 May 2018 16:52:00 -0700 Subject: [PATCH 7/7] Move calling of subspace iterations. The calling of `perform_subspace_iterations` now resides outside of the transform functions so that it can be used independent of which transform is used. Additionally, default n_subspace has been set to 2. --- ristretto/cur.py | 4 ++-- ristretto/dmd.py | 4 ++-- ristretto/eigen.py | 21 +++++++++++------- ristretto/interp_decomp.py | 4 ++-- ristretto/lu.py | 11 +++++---- ristretto/pca.py | 4 ++-- ristretto/qb.py | 15 ++++++++----- ristretto/sketch/tests/test_transforms.py | 16 -------------- ristretto/sketch/transforms.py | 27 ++++++----------------- ristretto/sketch/utils.py | 2 +- ristretto/svd.py | 4 ++-- 11 files changed, 47 insertions(+), 65 deletions(-) diff --git a/ristretto/cur.py b/ristretto/cur.py index 9806917..5594dd1 100644 --- a/ristretto/cur.py +++ b/ristretto/cur.py @@ -77,7 +77,7 @@ def cur(A, rank=None, index_set=False): return C, U, R -def rcur(A, rank, oversample=10, n_subspace=1, index_set=False, random_state=None): +def rcur(A, rank, oversample=10, n_subspace=2, index_set=False, random_state=None): """Randomized CUR decomposition. Randomized algorithm for computing the approximate low-rank CUR @@ -105,7 +105,7 @@ def rcur(A, rank, oversample=10, n_subspace=1, index_set=False, random_state=Non Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. - n_subspace : integer, default: 1. + n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. diff --git a/ristretto/dmd.py b/ristretto/dmd.py index 5532b05..15aaaa5 100644 --- a/ristretto/dmd.py +++ b/ristretto/dmd.py @@ -156,7 +156,7 @@ def dmd(A, rank=None, dt=1, modes='exact', return_amplitudes=False, return result -def rdmd(A, rank, dt=1, oversample=10, n_subspace=1, return_amplitudes=False, +def rdmd(A, rank, dt=1, oversample=10, n_subspace=2, return_amplitudes=False, return_vandermonde=False, order=True, random_state=None): """Randomized Dynamic Mode Decomposition. @@ -186,7 +186,7 @@ def rdmd(A, rank, dt=1, oversample=10, n_subspace=1, return_amplitudes=False, Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. - n_subspace : integer, default: 1. + n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. diff --git a/ristretto/eigen.py b/ristretto/eigen.py index 275c414..5496972 100644 --- a/ristretto/eigen.py +++ b/ristretto/eigen.py @@ -14,12 +14,13 @@ from scipy import linalg from .sketch.transforms import johnson_lindenstrauss, randomized_uniform_sampling +from .sketch.utils import perform_subspace_iterations from .utils import check_random_state, conjugate_transpose _VALID_DTYPES = (np.float32, np.float64, np.complex64, np.complex128) -def reigh(A, rank, oversample=10, n_subspace=1, random_state=None): +def reigh(A, rank, oversample=10, n_subspace=2, random_state=None): """Randomized eigendecompostion. The quality of the approximation can be controlled via the oversampling @@ -38,7 +39,7 @@ def reigh(A, rank, oversample=10, n_subspace=1, random_state=None): Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. - n_subspace : integer, default: 1. + n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. @@ -67,8 +68,10 @@ def reigh(A, rank, oversample=10, n_subspace=1, random_state=None): (available at `arXiv `_). """ # get random sketch - Q = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, - axis=1, random_state=random_state) + Q = johnson_lindenstrauss(A, rank + oversample, axis=1, random_state=random_state) + + if n_subspace > 0: + Q = perform_subspace_iterations(A, Q, n_iter=n_subspace, axis=1) #Project the data matrix a into a lower dimensional subspace B = A.dot(Q) @@ -85,7 +88,7 @@ def reigh(A, rank, oversample=10, n_subspace=1, random_state=None): return w[:rank], Q.dot(v)[:,:rank] -def reigh_nystroem(A, rank, oversample=10, n_subspace=1, random_state=None): +def reigh_nystroem(A, rank, oversample=10, n_subspace=2, random_state=None): """Randomized eigendecompostion using the Nystroem method. The quality of the approximation can be controlled via the oversampling @@ -104,7 +107,7 @@ def reigh_nystroem(A, rank, oversample=10, n_subspace=1, random_state=None): Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. - n_subspace : integer, default: 1. + n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. @@ -133,8 +136,10 @@ def reigh_nystroem(A, rank, oversample=10, n_subspace=1, random_state=None): (available at `arXiv `_). """ # get random sketch - S = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, - axis=1, random_state=random_state) + S = johnson_lindenstrauss(A, rank + oversample, axis=1, random_state=random_state) + + if n_subspace > 0: + S = perform_subspace_iterations(A, S, n_iter=n_subspace, axis=1) #Project the data matrix a into a lower dimensional subspace B1 = A.dot(S) diff --git a/ristretto/interp_decomp.py b/ristretto/interp_decomp.py index 0ebbdbd..44fe54c 100644 --- a/ristretto/interp_decomp.py +++ b/ristretto/interp_decomp.py @@ -107,7 +107,7 @@ def interp_decomp(A, rank, mode='column', index_set=False): return conjugate_transpose(V), conjugate_transpose(C) -def rinterp_decomp(A, rank, oversample=10, n_subspace=1, mode='column', +def rinterp_decomp(A, rank, oversample=10, n_subspace=2, mode='column', index_set=False, random_state=None): """Randomized interpolative decomposition (rID). @@ -139,7 +139,7 @@ def rinterp_decomp(A, rank, oversample=10, n_subspace=1, mode='column', Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. - n_subspace : integer, default: 1. + n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. diff --git a/ristretto/lu.py b/ristretto/lu.py index eabb752..460d86e 100644 --- a/ristretto/lu.py +++ b/ristretto/lu.py @@ -12,10 +12,11 @@ from scipy import sparse from .sketch.transforms import johnson_lindenstrauss +from .sketch.utils import perform_subspace_iterations from .utils import conjugate_transpose -def rlu(A, rank, oversample=10, n_subspace=1, permute=False, random_state=None): +def rlu(A, rank, oversample=10, n_subspace=2, permute=False, random_state=None): """Randomized LU Decomposition. Randomized algorithm for computing the approximate low-rank LU @@ -41,7 +42,7 @@ def rlu(A, rank, oversample=10, n_subspace=1, permute=False, random_state=None): Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. - n_subspace : integer, default: 1. + n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. @@ -82,8 +83,10 @@ def rlu(A, rank, oversample=10, n_subspace=1, permute=False, random_state=None): (available at `arXiv `_). """ # get random sketch - S = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, - random_state=random_state) + S = johnson_lindenstrauss(A, rank + oversample, random_state=random_state) + + if n_subspace > 0: + S = perform_subspace_iterations(A, S, n_iter=n_subspace, axis=1) # Compute pivoted LU decompostion of the orthonormal basis matrix Q. # Q = P * L * U diff --git a/ristretto/pca.py b/ristretto/pca.py index 423f1ec..1787946 100644 --- a/ristretto/pca.py +++ b/ristretto/pca.py @@ -284,7 +284,7 @@ def robspca(X, n_components, alpha=0.1, beta=0.1, gamma=0.1, max_iter=1000, def rspca(X, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, - verbose=0, oversample=10, n_subspace=1, random_state=None): + verbose=0, oversample=10, n_subspace=2, random_state=None): r"""Randomized Sparse Principal Component Analysis (rSPCA). Given a mean centered rectangular matrix `A` with shape `(m, n)`, SPCA @@ -328,7 +328,7 @@ def rspca(X, n_components, alpha=0.1, beta=0.1, max_iter=1000, tol=1e-5, Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. - n_subspace : integer, default: 1. + n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. diff --git a/ristretto/qb.py b/ristretto/qb.py index f3af33c..0b42bcb 100644 --- a/ristretto/qb.py +++ b/ristretto/qb.py @@ -9,10 +9,11 @@ from scipy import linalg from .sketch.transforms import johnson_lindenstrauss, sparse_johnson_lindenstrauss +from .sketch.utils import perform_subspace_iterations from .utils import conjugate_transpose -def rqb(A, rank, oversample=10, n_subspace=1, sparse=False, random_state=None): +def rqb(A, rank, oversample=10, n_subspace=2, sparse=False, random_state=None): """Randomized QB Decomposition. Randomized algorithm for computing the approximate low-rank QB @@ -36,7 +37,7 @@ def rqb(A, rank, oversample=10, n_subspace=1, sparse=False, random_state=None): Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. - n_subspace : integer, default: 1. + n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. @@ -73,11 +74,13 @@ def rqb(A, rank, oversample=10, n_subspace=1, sparse=False, random_state=None): (available at `arXiv `_). """ if sparse: - Q = sparse_johnson_lindenstrauss( - A, rank + oversample, n_subspace=n_subspace, random_state=random_state) + Q = sparse_johnson_lindenstrauss(A, rank + oversample, + random_state=random_state) else: - Q = johnson_lindenstrauss(A, rank + oversample, n_subspace=n_subspace, - random_state=random_state) + Q = johnson_lindenstrauss(A, rank + oversample, random_state=random_state) + + if n_subspace > 0: + Q = perform_subspace_iterations(A, Q, n_iter=n_subspace, axis=1) #Project the data matrix a into a lower dimensional subspace B = conjugate_transpose(Q).dot(A) diff --git a/ristretto/sketch/tests/test_transforms.py b/ristretto/sketch/tests/test_transforms.py index 97d989c..440b0e1 100644 --- a/ristretto/sketch/tests/test_transforms.py +++ b/ristretto/sketch/tests/test_transforms.py @@ -42,14 +42,6 @@ def test_johnson_linderstrauss(): assert row_trans.shape == (l, n) assert col_trans.shape == (m, l) - # ------------------------------------------------------------------------ - # tests subsapce iters - row_trans = johnson_lindenstrauss(A, l, n_subspace=2, axis=0) - col_trans = johnson_lindenstrauss(A, l, n_subspace=2, axis=1) - - assert row_trans.shape == (l, n) - assert col_trans.shape == (m, l) - # ------------------------------------------------------------------------ # tests raises incompatible axis assert_raises(ValueError, johnson_lindenstrauss, A, l, axis=2) @@ -72,14 +64,6 @@ def test_sparse_johnson_linderstrauss(): assert row_trans.shape == (l, n) assert col_trans.shape == (m, l) - # ------------------------------------------------------------------------ - # tests subsapce iters - row_trans = sparse_johnson_lindenstrauss(A, l, n_subspace=2, axis=0) - col_trans = sparse_johnson_lindenstrauss(A, l, n_subspace=2, axis=1) - - assert row_trans.shape == (l, n) - assert col_trans.shape == (m, l) - # ------------------------------------------------------------------------ # tests raises incompatible axis assert_raises(ValueError, sparse_johnson_lindenstrauss, A, l, axis=2) diff --git a/ristretto/sketch/transforms.py b/ristretto/sketch/transforms.py index 9b57c9b..795c59e 100644 --- a/ristretto/sketch/transforms.py +++ b/ristretto/sketch/transforms.py @@ -9,7 +9,6 @@ from scipy import sparse from . import _sketches -from .utils import perform_subspace_iterations from ..utils import check_random_state, safe_sparse_dot @@ -29,7 +28,7 @@ def randomized_uniform_sampling(A, l, axis=1, random_state=None): return np.take(A, idx, axis=axis) -def johnson_lindenstrauss(A, l, n_subspace=None, axis=1, random_state=None): +def johnson_lindenstrauss(A, l, axis=1, random_state=None): """ Given an m x n matrix A, and an integer l, this scheme computes an m x l @@ -50,18 +49,11 @@ def johnson_lindenstrauss(A, l, n_subspace=None, axis=1, random_state=None): # project A onto Omega if axis == 0: - Q = Omega.T.dot(A) - else: - Q = A.dot(Omega) + return Omega.T.dot(A) + return A.dot(Omega) - if n_subspace is not None: - Q = perform_subspace_iterations(A, Q, n_iter=n_subspace, axis=axis) - return Q - - -def sparse_johnson_lindenstrauss(A, l, n_subspace=None, density=None, axis=1, - random_state=None): +def sparse_johnson_lindenstrauss(A, l, density=None, axis=1, random_state=None): """ Given an m x n matrix A, and an integer l, this scheme computes an m x l @@ -89,14 +81,9 @@ def sparse_johnson_lindenstrauss(A, l, n_subspace=None, density=None, axis=1, # project A onto Omega if axis == 0: - Q = safe_sparse_dot(Omega.T, A) - else: - Q = safe_sparse_dot(A, Omega) + return safe_sparse_dot(Omega.T, A) + return safe_sparse_dot(A, Omega) - if n_subspace is not None: - Q = perform_subspace_iterations(A, Q, n_iter=n_subspace, axis=axis) - - return Q def fast_johnson_lindenstrauss(A, l, axis=1, random_state=None): """ @@ -120,7 +107,7 @@ def fast_johnson_lindenstrauss(A, l, axis=1, random_state=None): if axis == 0: diag = diag[:, np.newaxis] - + # discrete fourier transform of AD (or DA) FDA = fftpack.dct(A * diag, axis=axis, norm='ortho') diff --git a/ristretto/sketch/utils.py b/ristretto/sketch/utils.py index 1404ca4..062f247 100644 --- a/ristretto/sketch/utils.py +++ b/ristretto/sketch/utils.py @@ -13,7 +13,7 @@ def orthonormalize(A, overwrite_a=True, check_finite=False): return Q -def perform_subspace_iterations(A, Q, n_iter=1, axis=0): +def perform_subspace_iterations(A, Q, n_iter=2, axis=1): """perform subspace iterations on Q""" # TODO: can we figure out how not to transpose for row wise if axis == 0: diff --git a/ristretto/svd.py b/ristretto/svd.py index 865f864..8ec31da 100644 --- a/ristretto/svd.py +++ b/ristretto/svd.py @@ -16,7 +16,7 @@ from .utils import conjugate_transpose -def rsvd(A, rank, oversample=10, n_subspace=1, sparse=False, random_state=None): +def rsvd(A, rank, oversample=10, n_subspace=2, sparse=False, random_state=None): """Randomized Singular Value Decomposition. Randomized algorithm for computing the approximate low-rank singular value @@ -43,7 +43,7 @@ def rsvd(A, rank, oversample=10, n_subspace=1, sparse=False, random_state=None): Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. - n_subspace : integer, default: 1. + n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy.