Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RKHS inner product #550

Open
wants to merge 13 commits into
base: develop
Choose a base branch
from
18 changes: 18 additions & 0 deletions docs/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ @article{baillo+grane_2009_local
keywords = {62G08,62G30,Cross-validation,Fourier expansion,Functional data,Kernel regression,Local linear regression,Nonparametric smoothing}
}

@book{berlinet+thomas-agnan_2011_reproducing,
title={Reproducing kernel Hilbert spaces in probability and statistics},
author={Berlinet, Alain and Thomas-Agnan, Christine},
year={2011},
publisher={Springer Science \& Business Media}
}


@article{berrendero++_2016_mrmr,
title = {The {{mRMR}} Variable Selection Method: A Comparative Study for Functional Data},
shorttitle = {The {{mRMR}} Variable Selection Method},
Expand Down Expand Up @@ -308,6 +316,16 @@ @article{ghosh+chaudhuri_2005_maximum
keywords = {Bayes risk,cross-validation,data depth,elliptic symmetry,kernel density estimation,location shift model,Mahalanobis distance,misclassification rate,Vapnik Chervonenkis dimension}
}

@article{gutierrez++_1992_numerical,
title={On the numerical expansion of a second order stochastic process},
author={Guti{\'e}rrez, Ram{\'o}n and Ruiz, Juan Carlos and Valderrama, Mariano J},
journal={Applied stochastic models and data analysis},
volume={8},
number={2},
pages={67--77},
year={1992},
publisher={Wiley Online Library}

@article{hastie++_1995_penalized,
title = {Penalized Discriminant Analysis},
author = {Hastie, Trevor and Buja, Andreas and Tibshirani, Robert},
Expand Down
194 changes: 194 additions & 0 deletions examples/plot_rkhs_inner_product.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
"""
Reproducing Kernel Hilbert Space Inner Product for the Brownian Bridge
=======================================================================

This example shows how to compute the inner product of two functions in the
reproducing kernel Hilbert space (RKHS) of the Brownian Bridge.
"""

# Author: Martín Sánchez Signorini
# License: MIT

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from skfda.misc.rkhs_product import rkhs_inner_product
from skfda.representation import FDataGrid
from skfda.typing._numpy import NDArrayFloat

###############################################################################
# The kernel corresponding to a Brownian Bridge process
# :footcite:p:`gutierrez++_1992_numerical` in the interval :math:`[0, 1]` is
#
# .. math::
# k(s, t) = \min(s, t) - st
#
# The RKHS inner product method
# :func:`~skfda.misc.rkhs_product.rkhs_inner_product` requires the kernel to
# be defined as a function of two vector arguments that returns the matrix of
# values of the kernel in the corresponding grid.
# The following function defines the kernel as such a function.


def brownian_bridge_covariance(
t: NDArrayFloat,
s: NDArrayFloat,
) -> NDArrayFloat:
"""
Covariance function of the Brownian Bridge process.

This function must receive two vectors of points while returning the
matrix of values of the covariance function in the corresponding grid.
"""
t_col = t[:, None]
s_row = s[None, :]
return np.minimum(t_col, s_row) - t_col * s_row


###############################################################################
# The RKHS of this kernel :footcite:p:`berlinet+thomas-agnan_2011_reproducing`
# is the set of functions
#
# .. math::
# f: [0, 1] \to \mathbb{R} \quad \text{ such that }
# f \text{ is absolutely continuous, }
# f(0) = f(1) = 0 \text{ and }
# f' \in L^2([0, 1]).
#
# For this example we will be using the following functions in this RKHS:
#
# .. math::
# \begin{align}
# f(t) &= 1 - (2t - 1)^2 \\
# g(t) &= \sin(\pi t)
# \end{align}
#
# The following code defines a method to compute the inner product of these
# two functions in the RKHS of the Brownian Bridge, using a variable number of
# points of discretization of the functions.

def brownian_bridge_rkhs_inner_product(
num_points: int,
) -> float:
"""Inner product of two functions in the RKHS of the Brownian Bridge."""
# Define the functions
# Remove first and last points to avoid a singular covariance matrix
grid_points = np.linspace(0, 1, num_points + 2)[1:-1]
f = FDataGrid(
[1 - (2 * grid_points - 1)**2],
grid_points,
)
g = FDataGrid(
[np.sin(np.pi * grid_points)],
grid_points,
)

# Compute the inner product
return rkhs_inner_product( # type: ignore[no-any-return]
fdata1=f,
fdata2=g,
cov_function=brownian_bridge_covariance,
)[0]


# Plot the functions f and g in the same figure
FDataGrid(
np.concatenate(
[
1 - (2 * np.linspace(0, 1, 100) - 1)**2,
np.sin(np.pi * np.linspace(0, 1, 100)),
],
axis=0,
).reshape(2, 100),
np.linspace(0, 1, 100),
).plot()
plt.show()

###############################################################################
# The inner product of two functions :math:`f, g` in this RKHS
# :footcite:p:`berlinet+thomas-agnan_2011_reproducing` is
#
# .. math::
# \langle f, g \rangle = \int_0^1 f'(t) g'(t) dt.
#
# Therefore, the exact value of the product of these two functions in the RKHS
# of the Brownian Bridge can be explicitly calculated.
# First, we have that their derivatives are
#
# .. math::
# \begin{align}
# f'(t) &= 4(1 - 2t) \\
# g'(t) &= \pi \cos(\pi t)
# \end{align}
#
# Then, the inner product in :math:`L^2([0, 1])` of these derivatives is
#
# .. math::
# \begin{align}
# \langle f', g' \rangle &= \int_0^1 f'(t) g'(t) dt \\
# &= \int_0^1 4(1 - 2t) \pi \cos(\pi t) dt \\
# &= \frac{16}{\pi}.
# \end{align}
#
# Which is the exact value of their inner product in the RKHS of the Brownian
# Bridge.
# Thus, we measure the difference between the exact value and the value
# computed by the method :func:`~skfda.misc.rkhs_product.rkhs_inner_product`
# for increasing numbers of discretization points.
# In particular, we will be using from 500 to 10000 points with a step of 500.
#
# The following code computes the inner product for each number of points and
# plots the difference between the exact value and the computed value.

num_points_list = np.arange(
start=500,
stop=10001,
step=500,
)
expected_value = 16 / np.pi

errors_df = pd.DataFrame(
columns=["Number of points of discretization", "Absolute error"],
)

for num_points in num_points_list:
computed_value = brownian_bridge_rkhs_inner_product(num_points)
error = np.abs(computed_value - expected_value)

# Add new row to the dataframe
errors_df.loc[len(errors_df)] = [num_points, error]

# Plot the errors
errors_df.plot(
x="Number of points of discretization",
y="Absolute error",
title="Absolute error of the inner product",
xlabel="Number of points of discretization",
ylabel="Absolute error",
)
plt.show()


###############################################################################
# The following code plots the errors using a logarithmic scale in the y-axis.

errors_df.plot(
x="Number of points of discretization",
y="Absolute error",
title="Absolute error of the inner product",
xlabel="Number of points of discretization",
ylabel="Absolute error",
logy=True,
)
plt.show()

###############################################################################
# This example shows the convergence of the method
# :func:`~skfda.misc.rkhs_product.rkhs_inner_product` for the Brownian Bridge
# kernel, while also showing how to apply this method using a custom covariance
# function.

###############################################################################
# **References:**
# .. footbibliography::
2 changes: 2 additions & 0 deletions skfda/misc/covariances.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,8 @@ def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
Returns:
Covariance function evaluated at the grid formed by x and y.
"""
x = _transform_to_2d(x)
y = _transform_to_2d(y)
m5signorini marked this conversation as resolved.
Show resolved Hide resolved
return self.cov_fdata([x, y], grid=True)[0, ..., 0]


Expand Down
Loading
Loading