Finishing up polishing

peekxc · Dec 4, 2023 · 24aa2b3 · 24aa2b3
1 parent 842d6fe
commit 24aa2b3
Show file tree

Hide file tree

Showing 9 changed files with 78 additions and 61 deletions.
diff --git a/include/_lanczos/lanczos.h b/include/_lanczos/lanczos.h
@@ -124,21 +124,21 @@ auto lanczos_quadrature(
 };
 
 // Stochastic Lanczos quadrature method
-// const std::function<F(F)> mf,     // function to apply the eigenvalues of T = Q^T A Q
+// std::function<F(int,F*,F*)>
 template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG, typename Lambda >
 void slq (
-  const Matrix& A,                  // Any linear operator supporting .matvec() and .shape() methods
-  const Lambda& f,                  // Thread-safe function with signature f(int i, F* nodes, F* weights)
-  const function< bool(int) >& stop_check,   // Function to check for convergence or early-stopping (takes no arguments)
-  const int nv,                     // Number of sample vectors to generate
-  const Distribution dist,          // Isotropic distribution used to generate random vectors
-  RBG& rng,                         // Random bit generator
-  const int lanczos_degree,         // Polynomial degree of the Krylov expansion
-  const F lanczos_rtol,             // residual tolerance to consider subspace A-invariant
-  const int orth,                   // Number of vectors to re-orthogonalize against <= lanczos_degree
-  const int ncv,                    // Number of Lanczos vectors to keep in memory (per-thread)
-  const int num_threads,            // Number of threads used to parallelize the computation   
-  const int seed                    // Seed for random number generator for determinism
+  const Matrix& A,                            // Any linear operator supporting .matvec() and .shape() methods
+  const Lambda& f,                            // Thread-safe function with signature f(int i, F* nodes, F* weights)
+  const std::function< bool(int) >& stop_check, // Function to check for convergence or early-stopping (takes no arguments)
+  const int nv,                               // Number of sample vectors to generate
+  const Distribution dist,                    // Isotropic distribution used to generate random vectors
+  RBG& rng,                                   // Random bit generator
+  const int lanczos_degree,                   // Polynomial degree of the Krylov expansion
+  const F lanczos_rtol,                       // residual tolerance to consider subspace A-invariant
+  const int orth,                             // Number of vectors to re-orthogonalize against <= lanczos_degree
+  const int ncv,                              // Number of Lanczos vectors to keep in memory (per-thread)
+  const int num_threads,                      // Number of threads used to parallelize the computation   
+  const int seed                              // Seed for random number generator for determinism
 ){   
   using ArrayF = Eigen::Array< F, Dynamic, 1 >;
   if (ncv < 2){ throw std::invalid_argument("Invalid number of lanczos vectors supplied; must be >= 2."); }
@@ -182,7 +182,6 @@ void slq (
       generate_isotropic< F >(dist, m, rng, tid, q.data(), q_norm);
 
       // Perform a lanczos iteration (populates alpha, beta)
-      // NOTE: seems to be crashing here: 
       lanczos_recurrence< F >(A, q.data(), lanczos_degree, lanczos_rtol, orth, alpha.data(), beta.data(), Q.data(), ncv); 
 
       // Obtain nodes + weights via quadrature algorithm
@@ -213,6 +212,7 @@ void sl_trace(
   using VectorF = Eigen::Array< F, Dynamic, 1>;
 
   // Parameterize the trace function (run in parallel)
+  // const auto N = mat.shape().second;
   const auto trace_f = [lanczos_degree, &sf, &estimates](int i, [[maybe_unused]] F* q, [[maybe_unused]] F* Q, F* nodes, F* weights){
     Eigen::Map< VectorF > nodes_v(nodes, lanczos_degree, 1);     // no-op
     Eigen::Map< VectorF > weights_v(weights, lanczos_degree, 1); // no-op
@@ -265,7 +265,7 @@ void sl_trace(
   }
 
   // Execute the stochastic Lanczos quadrature with the trace function 
-  slq< float >(mat, trace_f, early_stop, nv, static_cast< Distribution >(dist), rbg, lanczos_degree, lanczos_rtol, orth, ncv, num_threads, seed);
+  slq< F >(mat, trace_f, early_stop, nv, static_cast< Distribution >(dist), rbg, lanczos_degree, lanczos_rtol, orth, ncv, num_threads, seed);
 }
 
 template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG > 

diff --git a/include/_random_generator/vector_generator.h b/include/_random_generator/vector_generator.h
@@ -13,7 +13,6 @@ using IndexType = unsigned int;
 using LongIndexType = unsigned long;
 static constexpr long num_bits = 64; 
 
-// TODO: make isotropic generator class that accepts random-bit genertaor
 enum Distribution { rademacher = 0, normal = 1, rayleigh = 2 };
 
 // SIMD-vectorized rademacher vector generation; makes N // 64 calls to random bit generator
@@ -42,7 +41,7 @@ void generate_normal(const unsigned long n, RBG& bit_generator, const int thread
 	static std::normal_distribution d { 0.0, 1.0 };
 	// const auto N = static_cast< unsigned long >(n / RBG::num_bits);
 	auto& gen = *bit_generator.generators[thread_id];
-	#pragma omp simd
+	// #pragma omp simd
 	for (auto i = static_cast< unsigned long >(0); i < n; ++i){
 		array[i] = d(gen);
 	}

diff --git a/meson.build b/meson.build
@@ -65,7 +65,7 @@ inc_pkg = include_directories('include')
 
 ## Add dependencies, if they are available
 dependency_map = {}
-subdir('blas') # Configure BLAS / LAPACK 
+# subdir('blas') # Configure BLAS / LAPACK 
 
 ## Include OpenMP, if possible
 omp = dependency('openmp', required: false)

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ requires = ['meson-python', 'wheel', 'ninja', 'pybind11', 'numpy', 'pythran-open
 
 [project]
 name = "primate"
-version = '0.2.1'
+version = '0.2.2'
 readme = "README.md"
 classifiers = [
 	"Intended Audience :: Science/Research",

diff --git a/src/primate/__init__.py b/src/primate/__init__.py
@@ -1,13 +1,16 @@
-__version__ = '0.1.3'
+import importlib.metadata
+__version__ = importlib.metadata.version("primate")
 
 from . import plotting 
 from . import ortho
 from . import trace
 from . import stats
-from . import random
+from .random import rademacher, normal
 from . import diagonalize
 
 
+# __all__ = ['rademacher', 'normal']
+
 ## Based on Numpy's usage: https://github.com/numpy/numpy/blob/v1.25.0/numpy/lib/utils.py#L75-L101
 def get_include():
   """Return the directory that contains the primate's \\*.h header files.
@@ -33,3 +36,5 @@ def get_include():
   import os 
   d = os.path.join(os.path.dirname(__file__), 'include')
   return d
+
+
diff --git a/src/primate/plotting.py b/src/primate/plotting.py
@@ -1,5 +1,4 @@
 import numpy as np
-from scipy.special import erfinv
 from typing import Union
 
 def figure_trace(samples: Union[np.ndarray, dict], real_trace: float = None, **kwargs):
@@ -9,6 +8,7 @@ def figure_trace(samples: Union[np.ndarray, dict], real_trace: float = None, **k
   from bokeh.plotting import figure
   from bokeh.layouts import row, column
   from bokeh.models import NumeralTickFormatter
+  from scipy.special import erfinv
 
   main_title = "Stochastic trace estimates"
   extra_titles = []

diff --git a/src/primate/random.py b/src/primate/random.py
@@ -3,10 +3,29 @@
 import _random_gen
 from math import prod 
 from numbers import Integral
+from scipy.spatial.distance import squareform
 
 _engines = ["splitmix64", "xoshiro256**", "lcg64", "pcg64", "mt64"]
 _engine_prefixes = ["sx", "xs", "lcg", "pcg", "mt"]
 
+def symmetric(n: int, dist: str = "normal", psd: bool = True, ew: np.ndarray = None):
+  N: int = n * (n-1) // 2
+  if dist == "uniform":
+    A = squareform(np.random.uniform(size=N))
+    np.einsum('ii->i', A)[:] = np.random.random(n)
+  elif dist == "normal": 
+    A = squareform(np.random.normal(size=N))
+    np.einsum('ii->i', A)[:] = np.random.random(n)
+  else: 
+    raise ValueError(f"Invalid distribution {dist} supplied")
+  ew = np.random.uniform(size=n, low=-1.0, high=1.0) if ew is None else np.array(ew)
+  if psd: 
+    ew = (ew + 1.0) / 2.0
+  Q, R = np.linalg.qr(A)
+  A = Q @ np.diag(ew) @ Q.T
+  A = (A + A.T) / 2
+  return A
+
 def rademacher(size: Union[int, tuple], rng: str = "splitmix64", seed: int = -1, dtype=np.float32):
   """Generates random vectors from the rademacher distribution. 
   

diff --git a/src/primate/trace.py b/src/primate/trace.py
@@ -74,7 +74,7 @@ def sl_trace (
 
   Reference
   ---------
-    .. [1] Ubaru, S., Chen, J., & Saad, Y. (2017). Fast estimation of tr(f(A)) via stochastic Lanczos quadrature. 
+    [1] Ubaru, S., Chen, J., & Saad, Y. (2017). Fast estimation of tr(f(A)) via stochastic Lanczos quadrature. 
     SIAM Journal on Matrix Analysis and Applications, 38(4), 1075-1099.
   """
   # assert isinstance(A, spmatrix) or isinstance(A, sparray), "A must be a sparse matrix, for now."
@@ -122,6 +122,8 @@ def sl_trace (
   rtol = 0.0 if rtol is None else float(rtol)           # Early stopper relative standard error bound
   num_threads = 0 if num_threads < 0 else int(num_threads)
 
+  ## Adjust tolerance for the quadrature estimates
+  atol /= A.shape[1]  
 
   ## Parameterize the matrix function and trace call
   if isinstance(fun, str):
@@ -144,6 +146,14 @@ def sl_trace (
 
   ## Make the actual call
   estimates = _lanczos.stochastic_trace(A, *sl_trace_args, **kwargs)
+  estimates *= A.shape[1]
+
+  ## Plot
+  if plot: 
+    from bokeh.plotting import show
+    from .plotting import figure_trace
+    show(figure_trace(estimates))
+
   return estimates
 
   ## If no information is required, just return the trace estimate 

diff --git a/tests/test_lanczos.py b/tests/test_lanczos.py
@@ -3,39 +3,29 @@
 from scipy.sparse.linalg import eigsh, aslinearoperator
 from scipy.sparse import csc_array, csr_array
 from more_itertools import * 
+from primate.random import symmetric
 
 ## Add the test directory to the sys path 
 import sys
 import primate
 ind = [i for i, c in enumerate(primate.__file__) if c == '/'][-3]
 sys.path.insert(0, primate.__file__[:ind] + '/tests')
 
-def gen_sym(n: int, ew: np.ndarray = None):
-  ew = 0.2 + 1.5*np.linspace(0, 5, n) if ew is None else ew
-  Q,R = np.linalg.qr(np.random.uniform(size=(n,n)))
-  A = Q @ np.diag(ew) @ Q.T
-  A = (A + A.T) / 2
-  return A
-
 def test_basic():
   from primate.diagonalize import lanczos
   np.random.seed(1234)
   n = 10
-  A = np.random.uniform(size=(n, n)).astype(np.float32)
-  A = A @ A.T
-  A_sparse = csc_array(A)
-  v0 = np.random.uniform(size=A.shape[1])
+  A_sparse = csc_array(symmetric(n))
+  v0 = np.random.uniform(size=n)
   (a,b), Q = lanczos(A_sparse, v0=v0, rtol=1e-8, orth=n-1, return_basis = True)
   assert np.abs(max(eigh_tridiagonal(a,b, eigvals_only=True)) - max(eigsh(A_sparse)[0])) < 1e-4
 
 def test_matvec():
   from primate.diagonalize import lanczos
   np.random.seed(1234)
   n = 10
-  A = np.random.uniform(size=(n, n)).astype(np.float32)
-  A = A @ A.T
-  A_sparse = csc_array(A)
-  v0 = np.random.uniform(size=A.shape[1])
+  A_sparse = csc_array(symmetric(n))
+  v0 = np.random.uniform(size=n)
   (a,b), Q = lanczos(A_sparse, v0=v0, rtol=1e-8, orth=0, return_basis = True) 
   rw, V = eigh_tridiagonal(a,b, eigvals_only=False)
   y = np.linalg.norm(v0) * (Q @ V @ (V[0,:] * rw))
@@ -46,8 +36,7 @@ def test_accuracy():
   from primate.diagonalize import lanczos
   np.random.seed(1234)
   n = 30
-  A = np.random.uniform(size=(n, n)).astype(np.float32)
-  A = A @ A.T
+  A = symmetric(n)
   alpha, beta = np.zeros(n, dtype=np.float32), np.zeros(n, dtype=np.float32)
 
   ## In general not guaranteed, but with full re-orthogonalization it's likely (and deterministic w/ fixed seed)
@@ -64,8 +53,7 @@ def test_high_degree():
   from primate.diagonalize import lanczos
   np.random.seed(1234)
   n = 30
-  A = np.random.uniform(size=(n, n)).astype(np.float32)
-  A = A @ A.T
+  A = symmetric(n)
   alpha, beta = np.zeros(n, dtype=np.float32), np.zeros(n, dtype=np.float32)
 
   ## In general not guaranteed, but with full re-orthogonalization it's likely (and deterministic w/ fixed seed)
@@ -80,30 +68,24 @@ def test_quadrature():
   from sanity import lanczos_quadrature
   np.random.seed(1234)
   n = 10
-  A = np.random.uniform(size=(n, n)).astype(np.float32)
-  A = A @ A.T
-  A_sparse = csc_array(A)
+  A = csc_array(symmetric(n))
   v0 = np.random.uniform(size=A.shape[1])
 
   ## Test the near-equivalence of the weights and nodes
-  alpha, beta = lanczos(A_sparse, v0, deg=n, orth=n)  
+  alpha, beta = lanczos(A, v0, deg=n, orth=n)  
   nw_test = _lanczos.quadrature(alpha, np.append(0, beta), n)
   nw_true = lanczos_quadrature(A, v=v0, k=n, orth=n)
   assert np.allclose(nw_true[0], nw_test[:,0], atol=1e-6)
   assert np.allclose(nw_true[1], nw_test[:,1], atol=1e-6)
 
-
-## TODO: see https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quadrature.html for alternative tol and rtol 
 def test_stochastic_quadrature():
   from primate.diagonalize import _lanczos
   assert hasattr(_lanczos, "stochastic_quadrature"), "Module compile failed"
 
   from primate.trace import sl_gauss
   np.random.seed(1234)
-  A = gen_sym(30)
-  n = A.shape[1]
-  nv, lanczos_deg = 250, 20
-  A = csc_array(A, dtype=np.float32)
+  n, nv, lanczos_deg = 30, 250, 20
+  A = csc_array(symmetric(30), dtype=np.float32)
 
   ## Test raw computation to ensure it's returning valid entries
   # A, nv, dist, engine_id, seed, lanczos_degree, lanczos_rtol, orth, ncv, num_threads
@@ -158,25 +140,24 @@ def test_slq_fixed():
 def test_slq_trace():
   from primate.trace import sl_trace, _lanczos
   np.random.seed(1234)
-  A = gen_sym(30)
-  n = A.shape[1]
-  A = csc_array(A, dtype=np.float32)
-  tr_est = sl_trace(A, maxiter = 20, num_threads=1)
-  assert len(tr_est) == 20
+  n = 25
+  A = csc_array(symmetric(n), dtype=np.float32)
+  tr_est = sl_trace(A, maxiter = 200, num_threads=1)
+  assert len(tr_est) == 200
+  assert np.isclose(np.mean(tr_est), A.trace(), atol=1.0)
 
 def test_slq_trace_clt_atol():
   from primate.trace import sl_trace, _lanczos
   np.random.seed(1234)
-  A = gen_sym(30)
-  n = A.shape[1]
-  A = csc_array(A, dtype=np.float32)
+  n = 30
+  A = csc_array(symmetric(n), dtype=np.float32)
 
   from primate.stats import sample_mean_cinterval
   tr_est = sl_trace(A, nv = 100, num_threads=1, seed=5)
   ci = np.array([sample_mean_cinterval(tr_est[:i], sdist='normal') if i > 1 else [-np.inf, np.inf] for i in range(len(tr_est))])
 
   ## Detect when, for the fixed set of samples, the trace estimatro should converge by CLT 
-  atol_threshold = (A.trace() * 0.05) / n
+  atol_threshold = (A.trace() * 0.05)
   clt_converged = np.ravel(0.5*np.diff(ci, axis=1)) <= atol_threshold
   assert np.any(clt_converged), "Did not converge!"
   converged_ind = np.flatnonzero(clt_converged)[0]
@@ -185,3 +166,6 @@ def test_slq_trace_clt_atol():
   tr_est = sl_trace(A, nv = 100, atol=atol_threshold, num_threads=1, seed=5)
   converged_online = np.take(np.flatnonzero(tr_est == 0.0), 0)
   assert converged_online == converged_ind, "SLQ not converging at correct index!"
+
+
+