Skip to content

Commit

Permalink
Merge pull request #4 from KlugerLab/3-add-ks-test
Browse files Browse the repository at this point in the history
3 add ks test
  • Loading branch information
fra-pcmgf authored Oct 25, 2024
2 parents 7822b5f + cbc2585 commit 5d628cf
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 40 deletions.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
#
# Creates a new release when a tag is pushed from git (do not create releases from github!).
# - trigger only when a tag pushed, use the format 'v1.2.3'. Do not create the tag from Github releases
# - creates a GitHub release
# - uploads the new version on PyPI https://pypi.org/p/dyson-equalizer
# Create a new GitHub release (do not run if one already exists!).
#
# Adapted from https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
name: Publish Python 🐍 distribution 📦 to PyPI and Github
name: Publish Python distribution to Github

on:
push:
tags:
- 'v*'
workflow_dispatch:

jobs:
Expand Down Expand Up @@ -37,34 +32,10 @@ jobs:
name: python-package-distributions
path: dist/

publish-to-pypi:
name: >-
Publish Python 🐍 distribution 📦 to PyPI
needs:
- build
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/dyson-equalizer
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing

steps:
- name: Download all the dists
uses: actions/download-artifact@v3
with:
name: python-package-distributions
path: dist/
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1

github-release:
if: github.event_name != 'release'
name: >-
Sign the Python 🐍 distribution 📦 with Sigstore
Sign the Python distribution with Sigstore
and upload them to GitHub Release
needs:
- publish-to-pypi
runs-on: ubuntu-latest

permissions:
Expand Down
58 changes: 58 additions & 0 deletions .github/workflows/publish_pypi_release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# Creates a new PyPI release when a tag is pushed from git
# - trigger only when a tag pushed, use the format 'v1.2.3'
# - uploads the new version on PyPI https://pypi.org/p/dyson-equalizer
# Adapted from https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
name: Publish Python distribution to PyPI

on:
push:
tags:
- 'v*'
workflow_dispatch:

jobs:
build:
name: Build distribution
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install pypa/build
run: >-
python3 -m
pip install
build
--user
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v3
with:
name: python-package-distributions
path: dist/

publish-to-pypi:
name: >-
Publish Python distribution to PyPI
needs:
- build
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/dyson-equalizer
permissions:
id-token: write

steps:
- name: Download all the dists
uses: actions/download-artifact@v3
with:
name: python-package-distributions
path: dist/
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
46 changes: 44 additions & 2 deletions dyson_equalizer/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def marchenko_pastur(
x: (n) numpy.array or float
a vector or a value for the xs to be computed
gamma: float
the ratio between the number of rows and the number of columns
the ratio between the number of rows and the number of columns (between 0 and 1)
sigma: float, optional
the variance of the entries of the random matrix (defaults to 1)
Expand All @@ -197,13 +197,55 @@ def marchenko_pastur(
* :math:`\\beta_\\pm = \\sigma^2(1\\pm\\sqrt{\\gamma})^2`
"""
if gamma < 0 or gamma > 1:
raise ValueError(f"The gamma must be between 0 and 1, got {gamma}")

x = np.asarray(x)
s2 = sigma ** 2

beta_m, beta_p = s2 * ((1 - gamma ** 0.5) ** 2, (1 + gamma ** 0.5) ** 2) # noqa
beta_m, beta_p = np.array([(1 - gamma ** 0.5) ** 2, (1 + gamma ** 0.5) ** 2]) * sigma**2

r = np.zeros_like(x)
i = (x > beta_m) & (x < beta_p)
xi = x[i]
r[i] = 1 / (2 * np.pi * s2) * np.sqrt((beta_p - xi) * (xi - beta_m)) / (gamma * xi)
return r


def marchenko_pastur_cdf(
x,
gamma: float,
sigma: float = 1,
):
"""Computes the cumulative density function of the Marchenko-Pastur distribution for the given values
Parameters
----------
x: (n) numpy.array or float
a vector or a value for the xs to be computed
gamma: float
the ratio between the number of rows and the number of columns (between 0 and 1)
sigma: float, optional
the variance of the entries of the random matrix (defaults to 1)
Returns
-------
y: (n) numpy.ndarray
The values of the cdf of the Marchenko-Pastur distribution
"""
if gamma < 0 or gamma > 1:
raise ValueError(f"The gamma must be between 0 and 1, got {gamma}")

a, b = np.array([(1 - gamma ** 0.5) ** 2, (1 + gamma ** 0.5) ** 2]) * sigma ** 2

r = np.zeros_like(x)
r[x >= b] = 1
i = (x > a) & (x < b)
xi = x[i]

r[i] = (np.sqrt((xi - a) * (b - xi)) + (a + b) / 2 * np.asin((2 * xi - a - b) / (b - a)) -
(np.sqrt(a * b)) * np.asin(((a + b) * xi - 2 * a * b) / (xi * (b - a)))) / (
2 * np.pi * gamma * sigma ** 2) + .5

return r
24 changes: 19 additions & 5 deletions dyson_equalizer/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import ks_1samp

from dyson_equalizer.algorithm import marchenko_pastur
from dyson_equalizer.algorithm import marchenko_pastur, marchenko_pastur_cdf


def plot_mp_eigenvalues(
Expand Down Expand Up @@ -72,11 +73,17 @@ def plot_mp_eigenvalues(
ax.legend()


from matplotlib import pyplot as plt
import numpy as np
from typing import Sequence


def plot_mp_density(
eigs: np.ndarray,
gamma: float,
show_only_significant: int = None,
matrix_label: str = 'X',
bins: int | str | Sequence = 'sqrt',
ax: plt.Axes | None = None,
) -> None:
"""Plots the density of eigenvalues of the covariance matrix and compares to the Marchenko-Pastur distribution
Expand All @@ -95,14 +102,16 @@ def plot_mp_density(
This option is useful is some of the signal eigenvalues are much bigger than the noise.
matrix_label: str, optional
The name of the matrix that will be used as label (defaults to ``X``)
bins: int or sequence or str, default: 'sqrt'
The bins parameter used to build the histogram.
ax: plt.Axes, optional
A matplotlib Axes object. If none is provided, a new figure is created.
See Also
--------
dyson_equalizer.algorithm.marchenko_pastur
matplotlib.pyplot.Axes.hist
Examples
--------
Expand All @@ -129,16 +138,21 @@ def plot_mp_density(
if ax is None:
_, ax = plt.subplots()

ksr = ks_1samp(eigs, cdf=marchenko_pastur_cdf, args=[gamma])

eigs = np.asarray(eigs)
beta_p = (1 + gamma ** 0.5) ** 2
rank = np.sum(eigs > beta_p)

if show_only_significant is not None:
eigs = eigs[rank - show_only_significant:]

ax.hist(eigs, bins='auto', density=True, label=f'Eigenvalues of {matrix_label}')
ax.hist(eigs, bins=bins, density=True, label=f'Eigenvalues of {matrix_label}')
x = np.linspace(start=0, stop=eigs[0] * 1.05, num=1000)
mp = marchenko_pastur(x, gamma)
ax.plot(x, mp, color='red', label='MP density')
ax.axvline(beta_p, linestyle='--', color='green', label='MP upper edge β₊')
ax.legend()
ax.axvline(beta_p, linestyle='--', color='green', label='MP upper edge β₊')

ax.text(0.60, 0.94, f'KS p-val = {ksr.pvalue:.5f}', transform=ax.transAxes)

ax.legend(bbox_to_anchor=(0.95, 0.90), loc='upper right', borderaxespad=0.)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
matplotlib>=3.6
numpy>=1.25
scipy>=1.13
pytest
#jupyterlab
#line_profiler
26 changes: 26 additions & 0 deletions tests/test_algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import unittest

import numpy as np

from dyson_equalizer.algorithm import marchenko_pastur, marchenko_pastur_cdf


class AlgorithmTestCase(unittest.TestCase):
def test_marchenko_pastur_cdf(self):
gamma = .25
sigma = 2.0

x = np.linspace(start=0, stop=10, num=1000)
mp = marchenko_pastur(x, gamma, sigma=sigma)
mc = marchenko_pastur_cdf(x, gamma, sigma=sigma)

delta = mp.cumsum() / mp.cumsum()[-1] - mc
self.assertLess(delta.max(), 0.005 )

self.assertEqual(0, mc.min())
self.assertEqual(1, mc.max())



if __name__ == '__main__':
unittest.main()

0 comments on commit 5d628cf

Please sign in to comment.