Merge pull request #4 from KlugerLab/3-add-ks-test

3 add ks test
KlugerLab · Oct 25, 2024 · 5d628cf · 5d628cf
2 parents 7822b5f + cbc2585
commit 5d628cf
Show file tree

Hide file tree

Showing 7 changed files with 152 additions and 40 deletions.
diff --git a/.github/workflows/documentation.yml → .github/workflows/publish_documentation.yml b/.github/workflows/documentation.yml → .github/workflows/publish_documentation.yml
diff --git a/.github/workflows/publish_release.yml → .github/workflows/publish_github_release.yml b/.github/workflows/publish_release.yml → .github/workflows/publish_github_release.yml
@@ -1,15 +1,10 @@
 #
-# Creates a new release when a tag is pushed from git (do not create releases from github!).
-# - trigger only when a tag pushed, use the format 'v1.2.3'. Do not create the tag from Github releases
-# - creates a GitHub release
-# - uploads the new version on PyPI https://pypi.org/p/dyson-equalizer
+# Create a new GitHub release (do not run if one already exists!).
+#
 # Adapted from https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
-name: Publish Python 🐍 distribution 📦 to PyPI and Github
+name: Publish Python distribution to Github
 
 on:
-  push:
-    tags:
-      - 'v*'
   workflow_dispatch:
 
 jobs:
@@ -37,34 +32,10 @@ jobs:
         name: python-package-distributions
         path: dist/
 
-  publish-to-pypi:
-    name: >-
-      Publish Python 🐍 distribution 📦 to PyPI
-    needs:
-    - build
-    runs-on: ubuntu-latest
-    environment:
-      name: pypi
-      url: https://pypi.org/p/dyson-equalizer
-    permissions:
-      id-token: write  # IMPORTANT: mandatory for trusted publishing
-
-    steps:
-    - name: Download all the dists
-      uses: actions/download-artifact@v3
-      with:
-        name: python-package-distributions
-        path: dist/
-    - name: Publish distribution 📦 to PyPI
-      uses: pypa/gh-action-pypi-publish@release/v1
-
   github-release:
-    if: github.event_name != 'release'
     name: >-
-      Sign the Python 🐍 distribution 📦 with Sigstore
+      Sign the Python distribution with Sigstore
       and upload them to GitHub Release
-    needs:
-    - publish-to-pypi
     runs-on: ubuntu-latest
 
     permissions:

diff --git a/.github/workflows/publish_pypi_release.yml b/.github/workflows/publish_pypi_release.yml
@@ -0,0 +1,58 @@
+#
+# Creates a new PyPI release when a tag is pushed from git
+# - trigger only when a tag pushed, use the format 'v1.2.3'
+# - uploads the new version on PyPI https://pypi.org/p/dyson-equalizer
+# Adapted from https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
+name: Publish Python distribution to PyPI
+
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: Build distribution
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.11"
+    - name: Install pypa/build
+      run: >-
+        python3 -m
+        pip install
+        build
+        --user
+    - name: Build a binary wheel and a source tarball
+      run: python3 -m build
+    - name: Store the distribution packages
+      uses: actions/upload-artifact@v3
+      with:
+        name: python-package-distributions
+        path: dist/
+
+  publish-to-pypi:
+    name: >-
+      Publish Python distribution to PyPI
+    needs:
+    - build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/dyson-equalizer
+    permissions:
+      id-token: write
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v3
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Publish distribution 📦 to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/dyson_equalizer/algorithm.py b/dyson_equalizer/algorithm.py
@@ -174,7 +174,7 @@ def marchenko_pastur(
     x: (n) numpy.array or float
         a vector or a value for the xs to be computed
     gamma: float
-        the ratio between the number of rows and the number of columns
+        the ratio between the number of rows and the number of columns (between 0 and 1)
     sigma: float, optional
         the variance of the entries of the random matrix (defaults to 1)
 
@@ -197,13 +197,55 @@ def marchenko_pastur(
       * :math:`\\beta_\\pm = \\sigma^2(1\\pm\\sqrt{\\gamma})^2`
 
     """
+    if gamma < 0 or gamma > 1:
+        raise ValueError(f"The gamma must be between 0 and 1, got {gamma}")
+
     x = np.asarray(x)
     s2 = sigma ** 2
 
-    beta_m, beta_p = s2 * ((1 - gamma ** 0.5) ** 2, (1 + gamma ** 0.5) ** 2) # noqa
+    beta_m, beta_p = np.array([(1 - gamma ** 0.5) ** 2, (1 + gamma ** 0.5) ** 2]) * sigma**2
 
     r = np.zeros_like(x)
     i = (x > beta_m) & (x < beta_p)
     xi = x[i]
     r[i] = 1 / (2 * np.pi * s2) * np.sqrt((beta_p - xi) * (xi - beta_m)) / (gamma * xi)
     return r
+
+
+def marchenko_pastur_cdf(
+        x,
+        gamma: float,
+        sigma: float = 1,
+):
+    """Computes the cumulative density function of the Marchenko-Pastur distribution for the given values
+
+    Parameters
+    ----------
+    x: (n) numpy.array or float
+        a vector or a value for the xs to be computed
+    gamma: float
+        the ratio between the number of rows and the number of columns (between 0 and 1)
+    sigma: float, optional
+        the variance of the entries of the random matrix (defaults to 1)
+
+    Returns
+    -------
+    y: (n) numpy.ndarray
+        The values of the cdf of the Marchenko-Pastur distribution
+
+    """
+    if gamma < 0 or gamma > 1:
+        raise ValueError(f"The gamma must be between 0 and 1, got {gamma}")
+
+    a, b = np.array([(1 - gamma ** 0.5) ** 2, (1 + gamma ** 0.5) ** 2]) * sigma ** 2
+
+    r = np.zeros_like(x)
+    r[x >= b] = 1
+    i = (x > a) & (x < b)
+    xi = x[i]
+
+    r[i] = (np.sqrt((xi - a) * (b - xi)) + (a + b) / 2 * np.asin((2 * xi - a - b) / (b - a)) -
+            (np.sqrt(a * b)) * np.asin(((a + b) * xi - 2 * a * b) / (xi * (b - a)))) / (
+                       2 * np.pi * gamma * sigma ** 2) + .5
+
+    return r
diff --git a/dyson_equalizer/plots.py b/dyson_equalizer/plots.py
@@ -3,8 +3,9 @@
 
 import numpy as np
 from matplotlib import pyplot as plt
+from scipy.stats import ks_1samp
 
-from dyson_equalizer.algorithm import marchenko_pastur
+from dyson_equalizer.algorithm import marchenko_pastur, marchenko_pastur_cdf
 
 
 def plot_mp_eigenvalues(
@@ -72,11 +73,17 @@ def plot_mp_eigenvalues(
     ax.legend()
 
 
+from matplotlib import pyplot as plt
+import numpy as np
+from typing import Sequence
+
+
 def plot_mp_density(
         eigs: np.ndarray,
         gamma: float,
         show_only_significant: int = None,
         matrix_label: str = 'X',
+        bins: int | str | Sequence = 'sqrt',
         ax: plt.Axes | None = None,
 ) -> None:
     """Plots the density of eigenvalues of the covariance matrix and compares to the Marchenko-Pastur distribution
@@ -95,14 +102,16 @@ def plot_mp_density(
         This option is useful is some of the signal eigenvalues are much bigger than the noise.
     matrix_label: str, optional
         The name of the matrix that will be used as label (defaults to ``X``)
+    bins: int or sequence or str, default: 'sqrt'
+        The bins parameter used to build the histogram.
     ax: plt.Axes, optional
         A matplotlib Axes object. If none is provided, a new figure is created.
 
     See Also
     --------
 
     dyson_equalizer.algorithm.marchenko_pastur
-
+    matplotlib.pyplot.Axes.hist
 
     Examples
     --------
@@ -129,16 +138,21 @@ def plot_mp_density(
     if ax is None:
         _, ax = plt.subplots()
 
+    ksr = ks_1samp(eigs, cdf=marchenko_pastur_cdf, args=[gamma])
+
     eigs = np.asarray(eigs)
     beta_p = (1 + gamma ** 0.5) ** 2
     rank = np.sum(eigs > beta_p)
 
     if show_only_significant is not None:
         eigs = eigs[rank - show_only_significant:]
 
-    ax.hist(eigs, bins='auto', density=True, label=f'Eigenvalues of {matrix_label}')
+    ax.hist(eigs, bins=bins, density=True, label=f'Eigenvalues of {matrix_label}')
     x = np.linspace(start=0, stop=eigs[0] * 1.05, num=1000)
     mp = marchenko_pastur(x, gamma)
     ax.plot(x, mp, color='red', label='MP density')
-    ax.axvline(beta_p, linestyle='--',  color='green', label='MP upper edge β₊')
-    ax.legend()
+    ax.axvline(beta_p, linestyle='--', color='green', label='MP upper edge β₊')
+
+    ax.text(0.60, 0.94, f'KS p-val = {ksr.pvalue:.5f}', transform=ax.transAxes)
+
+    ax.legend(bbox_to_anchor=(0.95, 0.90), loc='upper right', borderaxespad=0.)
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,6 @@
 matplotlib>=3.6
 numpy>=1.25
+scipy>=1.13
 pytest
 #jupyterlab
 #line_profiler
diff --git a/tests/test_algorithm.py b/tests/test_algorithm.py
@@ -0,0 +1,26 @@
+import unittest
+
+import numpy as np
+
+from dyson_equalizer.algorithm import marchenko_pastur, marchenko_pastur_cdf
+
+
+class AlgorithmTestCase(unittest.TestCase):
+    def test_marchenko_pastur_cdf(self):
+        gamma = .25
+        sigma = 2.0
+
+        x = np.linspace(start=0, stop=10, num=1000)
+        mp = marchenko_pastur(x, gamma, sigma=sigma)
+        mc = marchenko_pastur_cdf(x, gamma, sigma=sigma)
+
+        delta = mp.cumsum() / mp.cumsum()[-1] - mc
+        self.assertLess(delta.max(), 0.005 )
+
+        self.assertEqual(0, mc.min())
+        self.assertEqual(1, mc.max())
+
+
+
+if __name__ == '__main__':
+    unittest.main()