Skip to content

EHN Improve variable names in KernelPCA #19908

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 40 additions & 38 deletions sklearn/decomposition/_kernel_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ class KernelPCA(TransformerMixin, BaseEstimator):

alpha : float, default=1.0
Hyperparameter of the ridge regression that learns the
inverse transform (when fit_inverse_transform=True).
inverse transform (when enable_inverse_transform=True).

fit_inverse_transform : bool, default=False
enable_inverse_transform : bool, default=False
Learn the inverse transform for non-precomputed kernels.
(i.e. learn to find the pre-image of a point)

Expand Down Expand Up @@ -103,22 +103,22 @@ class KernelPCA(TransformerMixin, BaseEstimator):

Attributes
----------
lambdas_ : ndarray of shape (n_components,)
eigenvalues_ : ndarray of shape (n_components,)
Eigenvalues of the centered kernel matrix in decreasing order.
If `n_components` and `remove_zero_eig` are not set,
then all values are stored.

alphas_ : ndarray of shape (n_samples, n_components)
eigenvectors_ : ndarray of shape (n_samples, n_components)
Eigenvectors of the centered kernel matrix. If `n_components` and
`remove_zero_eig` are not set, then all components are stored.

dual_coef_ : ndarray of shape (n_samples, n_features)
Inverse transform matrix. Only available when
``fit_inverse_transform`` is True.
``enable_inverse_transform`` is True.

X_transformed_fit_ : ndarray of shape (n_samples, n_components)
Projection of the fitted data on the kernel principal components.
Only available when ``fit_inverse_transform`` is True.
Only available when ``enable_inverse_transform`` is True.

X_fit_ : ndarray of shape (n_samples, n_features)
The data used to fit the model. If `copy_X=False`, then `X_fit_` is
Expand All @@ -145,20 +145,21 @@ class KernelPCA(TransformerMixin, BaseEstimator):
@_deprecate_positional_args
def __init__(self, n_components=None, *, kernel="linear",
gamma=None, degree=3, coef0=1, kernel_params=None,
alpha=1.0, fit_inverse_transform=False, eigen_solver='auto',
tol=0, max_iter=None, remove_zero_eig=False,
random_state=None, copy_X=True, n_jobs=None):
if fit_inverse_transform and kernel == 'precomputed':
alpha=1.0, enable_inverse_transform=False,
eigen_solver='auto', tol=0, max_iter=None,
remove_zero_eig=False, random_state=None,
copy_X=True, n_jobs=None):
if enable_inverse_transform and kernel == 'precomputed':
raise ValueError(
"Cannot fit_inverse_transform with a precomputed kernel.")
"Cannot enable_inverse_transform with a precomputed kernel.")
self.n_components = n_components
self.kernel = kernel
self.kernel_params = kernel_params
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.alpha = alpha
self.fit_inverse_transform = fit_inverse_transform
self.enable_inverse_transform = enable_inverse_transform
self.eigen_solver = eigen_solver
self.remove_zero_eig = remove_zero_eig
self.tol = tol
Expand Down Expand Up @@ -206,33 +207,33 @@ def _fit_transform(self, K):
eigen_solver = self.eigen_solver

if eigen_solver == 'dense':
self.lambdas_, self.alphas_ = linalg.eigh(
self.eigenvalues_, self.eigenvectors_ = linalg.eigh(
K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))
elif eigen_solver == 'arpack':
v0 = _init_arpack_v0(K.shape[0], self.random_state)
self.lambdas_, self.alphas_ = eigsh(K, n_components,
self.eigenvalues_, self.eigenvectors_ = eigsh(K, n_components,
which="LA",
tol=self.tol,
maxiter=self.max_iter,
v0=v0)

# make sure that the eigenvalues are ok and fix numerical issues
self.lambdas_ = _check_psd_eigenvalues(self.lambdas_,
self.eigenvalues_ = _check_psd_eigenvalues(self.eigenvalues_,
enable_warnings=False)

# flip eigenvectors' sign to enforce deterministic output
self.alphas_, _ = svd_flip(self.alphas_,
np.zeros_like(self.alphas_).T)
self.eigenvectors_, _ = svd_flip(self.eigenvectors_,
np.zeros_like(self.eigenvectors_).T)

# sort eigenvectors in descending order
indices = self.lambdas_.argsort()[::-1]
self.lambdas_ = self.lambdas_[indices]
self.alphas_ = self.alphas_[:, indices]
indices = self.eigenvalues_.argsort()[::-1]
self.eigenvalues_ = self.eigenvalues_[indices]
self.eigenvectors_ = self.eigenvectors_[:, indices]

# remove eigenvectors with a zero eigenvalue (null space) if required
if self.remove_zero_eig or self.n_components is None:
self.alphas_ = self.alphas_[:, self.lambdas_ > 0]
self.lambdas_ = self.lambdas_[self.lambdas_ > 0]
self.eigenvectors_ = self.eigenvectors_[:, self.eigenvalues_ > 0]
self.eigenvalues_ = self.eigenvalues_[self.eigenvalues_ > 0]

# Maintenance note on Eigenvectors normalization
# ----------------------------------------------
Expand All @@ -243,12 +244,12 @@ def _fit_transform(self, K):
# if u is an eigenvector of Phi(X)Phi(X)'
# then Phi(X)'u is an eigenvector of Phi(X)'Phi(X)
#
# At this stage our self.alphas_ (the v) have norm 1, we need to scale
# them so that eigenvectors in kernel feature space (the u) have norm=1
# instead
# At this stage our self.eigenvectors_ (the v) have norm 1, we need to
# scale them so that eigenvectors in kernel feature space (the u) have
# norm=1 instead
#
# We COULD scale them here:
# self.alphas_ = self.alphas_ / np.sqrt(self.lambdas_)
# self.eigenvectors_ = self.eigenvectors_ / np.sqrt(self.eigenvalues_)
#
# But choose to perform that LATER when needed, in `fit()` and in
# `transform()`.
Expand Down Expand Up @@ -285,9 +286,9 @@ def fit(self, X, y=None):
K = self._get_kernel(X)
self._fit_transform(K)

if self.fit_inverse_transform:
if self.enable_inverse_transform:
# no need to use the kernel to transform X, use shortcut expression
X_transformed = self.alphas_ * np.sqrt(self.lambdas_)
X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)

self._fit_inverse_transform(X_transformed, X)

Expand All @@ -310,9 +311,9 @@ def fit_transform(self, X, y=None, **params):
self.fit(X, **params)

# no need to use the kernel to transform X, use shortcut expression
X_transformed = self.alphas_ * np.sqrt(self.lambdas_)
X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)

if self.fit_inverse_transform:
if self.enable_inverse_transform:
self._fit_inverse_transform(X_transformed, X)

return X_transformed
Expand All @@ -335,10 +336,10 @@ def transform(self, X):
K = self._centerer.transform(self._get_kernel(X, self.X_fit_))

# scale eigenvectors (properly account for null-space for dot product)
non_zeros = np.flatnonzero(self.lambdas_)
scaled_alphas = np.zeros_like(self.alphas_)
scaled_alphas[:, non_zeros] = (self.alphas_[:, non_zeros]
/ np.sqrt(self.lambdas_[non_zeros]))
non_zeros = np.flatnonzero(self.eigenvalues_)
scaled_alphas = np.zeros_like(self.eigenvectors_)
scaled_alphas[:, non_zeros] = (self.eigenvectors_[:, non_zeros]
/ np.sqrt(self.eigenvalues_[non_zeros]))

# Project with a scalar product between K and the scaled eigenvectors
return np.dot(K, scaled_alphas)
Expand All @@ -358,10 +359,11 @@ def inverse_transform(self, X):
----------
"Learning to Find Pre-Images", G BakIr et al, 2004.
"""
if not self.fit_inverse_transform:
raise NotFittedError("The fit_inverse_transform parameter was not"
" set to True when instantiating and hence "
"the inverse transform is not available.")
if not self.enable_inverse_transform:
raise NotFittedError("The enable_inverse_transform parameter was"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the codecov bot discovered, the existing tests for not trigger this branch of the code. Could you please add a quick test for this (using pytest.raises(NotFittedError))?

" not set to True when instantiating and"
" hence the inverse transform is not"
" available.")

K = self._get_kernel(X, self.X_transformed_fit_)
n_samples = self.X_transformed_fit_.shape[0]
Expand Down
18 changes: 10 additions & 8 deletions sklearn/decomposition/tests/test_kernel_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def histogram(x, y, **kwargs):

# transform fit data
kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
fit_inverse_transform=inv)
enable_inverse_transform=inv)
X_fit_transformed = kpca.fit_transform(X_fit)
X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
assert_array_almost_equal(np.abs(X_fit_transformed),
Expand All @@ -57,7 +57,7 @@ def histogram(x, y, **kwargs):

def test_kernel_pca_invalid_parameters():
with pytest.raises(ValueError):
KernelPCA(10, fit_inverse_transform=True, kernel='precomputed')
KernelPCA(10, enable_inverse_transform=True, kernel='precomputed')


def test_kernel_pca_consistent_transform():
Expand Down Expand Up @@ -97,7 +97,7 @@ def test_kernel_pca_sparse():
for kernel in ("linear", "rbf", "poly"):
# transform fit data
kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
fit_inverse_transform=False)
enable_inverse_transform=False)
X_fit_transformed = kpca.fit_transform(X_fit)
X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
assert_array_almost_equal(np.abs(X_fit_transformed),
Expand Down Expand Up @@ -261,7 +261,7 @@ def test_nested_circles():
# and the gamma value has to be updated, the Kernel PCA example will
# have to be updated too.
kpca = KernelPCA(kernel="rbf", n_components=2,
fit_inverse_transform=True, gamma=2.)
enable_inverse_transform=True, gamma=2.)
X_kpca = kpca.fit_transform(X)

# The data is perfectly linearly separable in that space
Expand All @@ -278,12 +278,13 @@ def test_kernel_conditioning():
[5+1e-8, 1e-8],
[5+1e-8, 0]]
kpca = KernelPCA(kernel="linear", n_components=2,
fit_inverse_transform=True)
enable_inverse_transform=True)
kpca.fit(X)

# check that the small non-zero eigenvalue was correctly set to zero
assert kpca.lambdas_.min() == 0
assert np.all(kpca.lambdas_ == _check_psd_eigenvalues(kpca.lambdas_))
assert kpca.eigenvalues_.min() == 0
assert np.all(kpca.eigenvalues_ ==
_check_psd_eigenvalues(kpca.eigenvalues_))


@pytest.mark.parametrize("kernel",
Expand All @@ -292,7 +293,8 @@ def test_kernel_pca_inverse_transform(kernel):
X, *_ = make_blobs(n_samples=100, n_features=4, centers=[[1, 1, 1, 1]],
random_state=0)

kp = KernelPCA(n_components=2, kernel=kernel, fit_inverse_transform=True)
kp = KernelPCA(n_components=2, kernel=kernel,
enable_inverse_transform=True)
X_trans = kp.fit_transform(X)
X_inv = kp.inverse_transform(X_trans)
assert_allclose(X, X_inv)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/manifold/_isomap.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def reconstruction_error(self):
"""
G = -0.5 * self.dist_matrix_ ** 2
G_center = KernelCenterer().fit_transform(G)
evals = self.kernel_pca_.lambdas_
evals = self.kernel_pca_.eigenvalues_
return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0]

def fit(self, X, y=None):
Expand Down