Skip to content

Commit

Permalink
Finishing up polishing
Browse files Browse the repository at this point in the history
  • Loading branch information
peekxc committed Dec 4, 2023
1 parent 842d6fe commit 24aa2b3
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 61 deletions.
30 changes: 15 additions & 15 deletions include/_lanczos/lanczos.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,21 +124,21 @@ auto lanczos_quadrature(
};

// Stochastic Lanczos quadrature method
// const std::function<F(F)> mf, // function to apply the eigenvalues of T = Q^T A Q
// std::function<F(int,F*,F*)>
template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG, typename Lambda >
void slq (
const Matrix& A, // Any linear operator supporting .matvec() and .shape() methods
const Lambda& f, // Thread-safe function with signature f(int i, F* nodes, F* weights)
const function< bool(int) >& stop_check, // Function to check for convergence or early-stopping (takes no arguments)
const int nv, // Number of sample vectors to generate
const Distribution dist, // Isotropic distribution used to generate random vectors
RBG& rng, // Random bit generator
const int lanczos_degree, // Polynomial degree of the Krylov expansion
const F lanczos_rtol, // residual tolerance to consider subspace A-invariant
const int orth, // Number of vectors to re-orthogonalize against <= lanczos_degree
const int ncv, // Number of Lanczos vectors to keep in memory (per-thread)
const int num_threads, // Number of threads used to parallelize the computation
const int seed // Seed for random number generator for determinism
const Matrix& A, // Any linear operator supporting .matvec() and .shape() methods
const Lambda& f, // Thread-safe function with signature f(int i, F* nodes, F* weights)
const std::function< bool(int) >& stop_check, // Function to check for convergence or early-stopping (takes no arguments)
const int nv, // Number of sample vectors to generate
const Distribution dist, // Isotropic distribution used to generate random vectors
RBG& rng, // Random bit generator
const int lanczos_degree, // Polynomial degree of the Krylov expansion
const F lanczos_rtol, // residual tolerance to consider subspace A-invariant
const int orth, // Number of vectors to re-orthogonalize against <= lanczos_degree
const int ncv, // Number of Lanczos vectors to keep in memory (per-thread)
const int num_threads, // Number of threads used to parallelize the computation
const int seed // Seed for random number generator for determinism
){
using ArrayF = Eigen::Array< F, Dynamic, 1 >;
if (ncv < 2){ throw std::invalid_argument("Invalid number of lanczos vectors supplied; must be >= 2."); }
Expand Down Expand Up @@ -182,7 +182,6 @@ void slq (
generate_isotropic< F >(dist, m, rng, tid, q.data(), q_norm);

// Perform a lanczos iteration (populates alpha, beta)
// NOTE: seems to be crashing here:
lanczos_recurrence< F >(A, q.data(), lanczos_degree, lanczos_rtol, orth, alpha.data(), beta.data(), Q.data(), ncv);

// Obtain nodes + weights via quadrature algorithm
Expand Down Expand Up @@ -213,6 +212,7 @@ void sl_trace(
using VectorF = Eigen::Array< F, Dynamic, 1>;

// Parameterize the trace function (run in parallel)
// const auto N = mat.shape().second;
const auto trace_f = [lanczos_degree, &sf, &estimates](int i, [[maybe_unused]] F* q, [[maybe_unused]] F* Q, F* nodes, F* weights){
Eigen::Map< VectorF > nodes_v(nodes, lanczos_degree, 1); // no-op
Eigen::Map< VectorF > weights_v(weights, lanczos_degree, 1); // no-op
Expand Down Expand Up @@ -265,7 +265,7 @@ void sl_trace(
}

// Execute the stochastic Lanczos quadrature with the trace function
slq< float >(mat, trace_f, early_stop, nv, static_cast< Distribution >(dist), rbg, lanczos_degree, lanczos_rtol, orth, ncv, num_threads, seed);
slq< F >(mat, trace_f, early_stop, nv, static_cast< Distribution >(dist), rbg, lanczos_degree, lanczos_rtol, orth, ncv, num_threads, seed);
}

template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG >
Expand Down
3 changes: 1 addition & 2 deletions include/_random_generator/vector_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ using IndexType = unsigned int;
using LongIndexType = unsigned long;
static constexpr long num_bits = 64;

// TODO: make isotropic generator class that accepts random-bit genertaor
enum Distribution { rademacher = 0, normal = 1, rayleigh = 2 };

// SIMD-vectorized rademacher vector generation; makes N // 64 calls to random bit generator
Expand Down Expand Up @@ -42,7 +41,7 @@ void generate_normal(const unsigned long n, RBG& bit_generator, const int thread
static std::normal_distribution d { 0.0, 1.0 };
// const auto N = static_cast< unsigned long >(n / RBG::num_bits);
auto& gen = *bit_generator.generators[thread_id];
#pragma omp simd
// #pragma omp simd
for (auto i = static_cast< unsigned long >(0); i < n; ++i){
array[i] = d(gen);
}
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ inc_pkg = include_directories('include')

## Add dependencies, if they are available
dependency_map = {}
subdir('blas') # Configure BLAS / LAPACK
# subdir('blas') # Configure BLAS / LAPACK

## Include OpenMP, if possible
omp = dependency('openmp', required: false)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ requires = ['meson-python', 'wheel', 'ninja', 'pybind11', 'numpy', 'pythran-open

[project]
name = "primate"
version = '0.2.1'
version = '0.2.2'
readme = "README.md"
classifiers = [
"Intended Audience :: Science/Research",
Expand Down
9 changes: 7 additions & 2 deletions src/primate/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
__version__ = '0.1.3'
import importlib.metadata
__version__ = importlib.metadata.version("primate")

from . import plotting
from . import ortho
from . import trace
from . import stats
from . import random
from .random import rademacher, normal
from . import diagonalize


# __all__ = ['rademacher', 'normal']

## Based on Numpy's usage: https://github.com/numpy/numpy/blob/v1.25.0/numpy/lib/utils.py#L75-L101
def get_include():
"""Return the directory that contains the primate's \\*.h header files.
Expand All @@ -33,3 +36,5 @@ def get_include():
import os
d = os.path.join(os.path.dirname(__file__), 'include')
return d


2 changes: 1 addition & 1 deletion src/primate/plotting.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
from scipy.special import erfinv
from typing import Union

def figure_trace(samples: Union[np.ndarray, dict], real_trace: float = None, **kwargs):
Expand All @@ -9,6 +8,7 @@ def figure_trace(samples: Union[np.ndarray, dict], real_trace: float = None, **k
from bokeh.plotting import figure
from bokeh.layouts import row, column
from bokeh.models import NumeralTickFormatter
from scipy.special import erfinv

main_title = "Stochastic trace estimates"
extra_titles = []
Expand Down
19 changes: 19 additions & 0 deletions src/primate/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,29 @@
import _random_gen
from math import prod
from numbers import Integral
from scipy.spatial.distance import squareform

_engines = ["splitmix64", "xoshiro256**", "lcg64", "pcg64", "mt64"]
_engine_prefixes = ["sx", "xs", "lcg", "pcg", "mt"]

def symmetric(n: int, dist: str = "normal", psd: bool = True, ew: np.ndarray = None):
N: int = n * (n-1) // 2
if dist == "uniform":
A = squareform(np.random.uniform(size=N))
np.einsum('ii->i', A)[:] = np.random.random(n)
elif dist == "normal":
A = squareform(np.random.normal(size=N))
np.einsum('ii->i', A)[:] = np.random.random(n)
else:
raise ValueError(f"Invalid distribution {dist} supplied")
ew = np.random.uniform(size=n, low=-1.0, high=1.0) if ew is None else np.array(ew)
if psd:
ew = (ew + 1.0) / 2.0
Q, R = np.linalg.qr(A)
A = Q @ np.diag(ew) @ Q.T
A = (A + A.T) / 2
return A

def rademacher(size: Union[int, tuple], rng: str = "splitmix64", seed: int = -1, dtype=np.float32):
"""Generates random vectors from the rademacher distribution.
Expand Down
12 changes: 11 additions & 1 deletion src/primate/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def sl_trace (
Reference
---------
.. [1] Ubaru, S., Chen, J., & Saad, Y. (2017). Fast estimation of tr(f(A)) via stochastic Lanczos quadrature.
[1] Ubaru, S., Chen, J., & Saad, Y. (2017). Fast estimation of tr(f(A)) via stochastic Lanczos quadrature.
SIAM Journal on Matrix Analysis and Applications, 38(4), 1075-1099.
"""
# assert isinstance(A, spmatrix) or isinstance(A, sparray), "A must be a sparse matrix, for now."
Expand Down Expand Up @@ -122,6 +122,8 @@ def sl_trace (
rtol = 0.0 if rtol is None else float(rtol) # Early stopper relative standard error bound
num_threads = 0 if num_threads < 0 else int(num_threads)

## Adjust tolerance for the quadrature estimates
atol /= A.shape[1]

## Parameterize the matrix function and trace call
if isinstance(fun, str):
Expand All @@ -144,6 +146,14 @@ def sl_trace (

## Make the actual call
estimates = _lanczos.stochastic_trace(A, *sl_trace_args, **kwargs)
estimates *= A.shape[1]

## Plot
if plot:
from bokeh.plotting import show
from .plotting import figure_trace
show(figure_trace(estimates))

return estimates

## If no information is required, just return the trace estimate
Expand Down
60 changes: 22 additions & 38 deletions tests/test_lanczos.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,29 @@
from scipy.sparse.linalg import eigsh, aslinearoperator
from scipy.sparse import csc_array, csr_array
from more_itertools import *
from primate.random import symmetric

## Add the test directory to the sys path
import sys
import primate
ind = [i for i, c in enumerate(primate.__file__) if c == '/'][-3]
sys.path.insert(0, primate.__file__[:ind] + '/tests')

def gen_sym(n: int, ew: np.ndarray = None):
ew = 0.2 + 1.5*np.linspace(0, 5, n) if ew is None else ew
Q,R = np.linalg.qr(np.random.uniform(size=(n,n)))
A = Q @ np.diag(ew) @ Q.T
A = (A + A.T) / 2
return A

def test_basic():
from primate.diagonalize import lanczos
np.random.seed(1234)
n = 10
A = np.random.uniform(size=(n, n)).astype(np.float32)
A = A @ A.T
A_sparse = csc_array(A)
v0 = np.random.uniform(size=A.shape[1])
A_sparse = csc_array(symmetric(n))
v0 = np.random.uniform(size=n)
(a,b), Q = lanczos(A_sparse, v0=v0, rtol=1e-8, orth=n-1, return_basis = True)
assert np.abs(max(eigh_tridiagonal(a,b, eigvals_only=True)) - max(eigsh(A_sparse)[0])) < 1e-4

def test_matvec():
from primate.diagonalize import lanczos
np.random.seed(1234)
n = 10
A = np.random.uniform(size=(n, n)).astype(np.float32)
A = A @ A.T
A_sparse = csc_array(A)
v0 = np.random.uniform(size=A.shape[1])
A_sparse = csc_array(symmetric(n))
v0 = np.random.uniform(size=n)
(a,b), Q = lanczos(A_sparse, v0=v0, rtol=1e-8, orth=0, return_basis = True)
rw, V = eigh_tridiagonal(a,b, eigvals_only=False)
y = np.linalg.norm(v0) * (Q @ V @ (V[0,:] * rw))
Expand All @@ -46,8 +36,7 @@ def test_accuracy():
from primate.diagonalize import lanczos
np.random.seed(1234)
n = 30
A = np.random.uniform(size=(n, n)).astype(np.float32)
A = A @ A.T
A = symmetric(n)
alpha, beta = np.zeros(n, dtype=np.float32), np.zeros(n, dtype=np.float32)

## In general not guaranteed, but with full re-orthogonalization it's likely (and deterministic w/ fixed seed)
Expand All @@ -64,8 +53,7 @@ def test_high_degree():
from primate.diagonalize import lanczos
np.random.seed(1234)
n = 30
A = np.random.uniform(size=(n, n)).astype(np.float32)
A = A @ A.T
A = symmetric(n)
alpha, beta = np.zeros(n, dtype=np.float32), np.zeros(n, dtype=np.float32)

## In general not guaranteed, but with full re-orthogonalization it's likely (and deterministic w/ fixed seed)
Expand All @@ -80,30 +68,24 @@ def test_quadrature():
from sanity import lanczos_quadrature
np.random.seed(1234)
n = 10
A = np.random.uniform(size=(n, n)).astype(np.float32)
A = A @ A.T
A_sparse = csc_array(A)
A = csc_array(symmetric(n))
v0 = np.random.uniform(size=A.shape[1])

## Test the near-equivalence of the weights and nodes
alpha, beta = lanczos(A_sparse, v0, deg=n, orth=n)
alpha, beta = lanczos(A, v0, deg=n, orth=n)
nw_test = _lanczos.quadrature(alpha, np.append(0, beta), n)
nw_true = lanczos_quadrature(A, v=v0, k=n, orth=n)
assert np.allclose(nw_true[0], nw_test[:,0], atol=1e-6)
assert np.allclose(nw_true[1], nw_test[:,1], atol=1e-6)


## TODO: see https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quadrature.html for alternative tol and rtol
def test_stochastic_quadrature():
from primate.diagonalize import _lanczos
assert hasattr(_lanczos, "stochastic_quadrature"), "Module compile failed"

from primate.trace import sl_gauss
np.random.seed(1234)
A = gen_sym(30)
n = A.shape[1]
nv, lanczos_deg = 250, 20
A = csc_array(A, dtype=np.float32)
n, nv, lanczos_deg = 30, 250, 20
A = csc_array(symmetric(30), dtype=np.float32)

## Test raw computation to ensure it's returning valid entries
# A, nv, dist, engine_id, seed, lanczos_degree, lanczos_rtol, orth, ncv, num_threads
Expand Down Expand Up @@ -158,25 +140,24 @@ def test_slq_fixed():
def test_slq_trace():
from primate.trace import sl_trace, _lanczos
np.random.seed(1234)
A = gen_sym(30)
n = A.shape[1]
A = csc_array(A, dtype=np.float32)
tr_est = sl_trace(A, maxiter = 20, num_threads=1)
assert len(tr_est) == 20
n = 25
A = csc_array(symmetric(n), dtype=np.float32)
tr_est = sl_trace(A, maxiter = 200, num_threads=1)
assert len(tr_est) == 200
assert np.isclose(np.mean(tr_est), A.trace(), atol=1.0)

def test_slq_trace_clt_atol():
from primate.trace import sl_trace, _lanczos
np.random.seed(1234)
A = gen_sym(30)
n = A.shape[1]
A = csc_array(A, dtype=np.float32)
n = 30
A = csc_array(symmetric(n), dtype=np.float32)

from primate.stats import sample_mean_cinterval
tr_est = sl_trace(A, nv = 100, num_threads=1, seed=5)
ci = np.array([sample_mean_cinterval(tr_est[:i], sdist='normal') if i > 1 else [-np.inf, np.inf] for i in range(len(tr_est))])

## Detect when, for the fixed set of samples, the trace estimatro should converge by CLT
atol_threshold = (A.trace() * 0.05) / n
atol_threshold = (A.trace() * 0.05)
clt_converged = np.ravel(0.5*np.diff(ci, axis=1)) <= atol_threshold
assert np.any(clt_converged), "Did not converge!"
converged_ind = np.flatnonzero(clt_converged)[0]
Expand All @@ -185,3 +166,6 @@ def test_slq_trace_clt_atol():
tr_est = sl_trace(A, nv = 100, atol=atol_threshold, num_threads=1, seed=5)
converged_online = np.take(np.flatnonzero(tr_est == 0.0), 0)
assert converged_online == converged_ind, "SLQ not converging at correct index!"



0 comments on commit 24aa2b3

Please sign in to comment.