Skip to content

cca_zoo.nonparametric

Kernel-based nonparametric CCA methods.


KCCA

KCCA(latent_dimensions: int = 1, center: bool = True, c: float | list[float] = 0.1, kernel: str | list[str] = 'linear', gamma: float | list[float | None] | None = None, degree: float | list[float] = 1.0, coef0: float | list[float] = 1.0, kernel_params: dict[str, object] | list[dict[str, object]] | None = None, eps: float = 0.001)

Bases: BaseModel

Kernel Canonical Correlation Analysis.

Extends MCCA to nonlinear relationships by mapping each view into a reproducing kernel Hilbert space via a kernel function :math:k_i. The dual variables (kernel coefficients) :math:\boldsymbol{\alpha}_i are found by solving the kernelised generalised eigenvalue problem:

.. math::

A \boldsymbol{\alpha} = \lambda B \boldsymbol{\alpha}

where:

  • :math:A is the between-kernel cross-covariance block matrix.
  • :math:B = \mathrm{block\_diag}\bigl( c_i K_i + (1 - c_i) K_i^2 \bigr) is the regularised within-kernel matrix.
References

Hardoon, D. R., Szedmak, S., & Shawe-Taylor, J. (2004). Canonical correlation analysis: An overview with application to learning methods. Neural Computation, 16(12), 2639–2664.

Parameters:

Name Type Description Default
latent_dimensions int

Number of latent dimensions. Default is 1.

1
center bool

Whether to subtract column means before fitting. Default True.

True
c float | list[float]

Regularisation parameter(s) in [0, 1]. Default is 0.1.

0.1
kernel str | list[str]

Kernel name(s) or callable(s) passed to :func:sklearn.metrics.pairwise_kernels. Default is "linear".

'linear'
gamma float | list[float | None] | None

Gamma parameter(s) for the RBF/polynomial kernel.

None
degree float | list[float]

Degree parameter(s) for the polynomial kernel.

1.0
coef0 float | list[float]

coef0 parameter(s) for the polynomial/sigmoid kernel.

1.0
kernel_params dict[str, object] | list[dict[str, object]] | None

Extra per-view keyword arguments for the kernel.

None
eps float

Regularisation floor for the B matrix. Default is 1e-3.

0.001
Example

import numpy as np rng = np.random.default_rng(0) X1 = rng.standard_normal((30, 5)) X2 = rng.standard_normal((30, 5)) model = KCCA(latent_dimensions=2, c=0.1).fit([X1, X2]) scores = model.transform([X1, X2])

Source code in cca_zoo/nonparametric/_kcca.py
def __init__(
    self,
    latent_dimensions: int = 1,
    center: bool = True,
    c: float | list[float] = 0.1,
    kernel: str | list[str] = "linear",
    gamma: float | list[float | None] | None = None,
    degree: float | list[float] = 1.0,
    coef0: float | list[float] = 1.0,
    kernel_params: dict[str, object] | list[dict[str, object]] | None = None,
    eps: float = 1e-3,
) -> None:
    super().__init__(latent_dimensions=latent_dimensions, center=center)
    self.c = c
    self.kernel = kernel
    self.gamma = gamma
    self.degree = degree
    self.coef0 = coef0
    self.kernel_params = kernel_params
    self.eps = eps

fit

fit(views: list[ArrayLike], y: None = None) -> KCCA

Fit the KCCA model.

Parameters:

Name Type Description Default
views list[ArrayLike]

List of arrays, each (n_samples, n_features_i).

required
y None

Ignored.

None

Returns:

Name Type Description
self KCCA

Fitted estimator.

Raises:

Type Description
ValueError

If fewer than 2 views are provided.

ValueError

If views have inconsistent numbers of samples.

Source code in cca_zoo/nonparametric/_kcca.py
def fit(self, views: list[ArrayLike], y: None = None) -> KCCA:
    """Fit the KCCA model.

    Args:
        views: List of arrays, each (n_samples, n_features_i).
        y: Ignored.

    Returns:
        self: Fitted estimator.

    Raises:
        ValueError: If fewer than 2 views are provided.
        ValueError: If views have inconsistent numbers of samples.
    """
    views_: list[np.ndarray] = self._setup_fit(views)
    c_ = perview_parameter("c", self.c, 0.1, self.n_views_)
    kernel_ = perview_parameter("kernel", self.kernel, "linear", self.n_views_)
    gamma_ = perview_parameter("gamma", self.gamma, None, self.n_views_)
    degree_ = perview_parameter("degree", self.degree, 1.0, self.n_views_)
    coef0_ = perview_parameter("coef0", self.coef0, 1.0, self.n_views_)
    kp_ = perview_parameter("kernel_params", self.kernel_params, {}, self.n_views_)

    self.train_views_: list[np.ndarray] = views_
    kernels = self._compute_kernels(views_, kernel_, gamma_, degree_, coef0_, kp_)
    A = self._build_A(kernels)
    B = self._build_B(kernels, c_)
    splits = np.cumsum([k.shape[1] for k in kernels])
    _, eigvecs = gevp(A, B, self.latent_dimensions)
    self.weights_: list[np.ndarray] = list(np.split(eigvecs, splits[:-1], axis=0))
    # Store kernel parameters for transform
    self._kernel: list[str] = kernel_
    self._gamma: list[float | None] = gamma_
    self._degree: list[float] = degree_
    self._coef0: list[float] = coef0_
    self._kp: list[dict[str, object]] = kp_
    return self

transform

transform(views: list[ArrayLike]) -> list[np.ndarray]

Transform new views using the fitted kernel dual variables.

Parameters:

Name Type Description Default
views list[ArrayLike]

List of arrays, each (n_samples_test, n_features_i).

required

Returns:

Type Description
list[ndarray]

List of arrays, each (n_samples_test, latent_dimensions).

Raises:

Type Description
NotFittedError

If fit has not been called.

Source code in cca_zoo/nonparametric/_kcca.py
def transform(self, views: list[ArrayLike]) -> list[np.ndarray]:
    """Transform new views using the fitted kernel dual variables.

    Args:
        views: List of arrays, each (n_samples_test, n_features_i).

    Returns:
        List of arrays, each (n_samples_test, latent_dimensions).

    Raises:
        sklearn.exceptions.NotFittedError: If ``fit`` has not been called.
    """
    check_is_fitted(self)
    from cca_zoo._utils._validation import validate_views

    validated = validate_views(views)
    result = []
    for i, v in enumerate(validated):
        K_test = pairwise_kernels(
            self.train_views_[i],
            Y=v,
            metric=self._kernel[i],
            gamma=self._gamma[i],
            degree=self._degree[i],
            coef0=self._coef0[i],
            filter_params=True,
            **(self._kp[i] if self._kp[i] else {}),
        )
        result.append(K_test.T @ self.weights_[i])
    return result

KGCCA

KGCCA(latent_dimensions: int = 1, center: bool = True, c: float | list[float] = 0.1, kernel: str | list[str] = 'linear', gamma: float | list[float | None] | None = None, degree: float | list[float] = 1.0, coef0: float | list[float] = 1.0, kernel_params: dict[str, object] | list[dict[str, object]] | None = None, view_weights: list[float] | None = None, eps: float = 1e-06)

Bases: BaseModel

Kernel Generalised Canonical Correlation Analysis.

Kernelised version of GCCA. The shared latent vector is found by solving the eigenvalue problem on the weighted sum of kernel projection matrices:

.. math::

Q = \sum_{i=1}^M \mu_i K_i
    \bigl(c_i K_i + (1 - c_i) K_i^2\bigr)^{-1} K_i

and the dual variables (kernel coefficients) are recovered as :math:\boldsymbol{\alpha}_i = K_i^+ T where :math:T is the matrix of top-k eigenvectors of :math:Q.

References

Tenenhaus, A., Philippe, C., & Frouin, V. (2015). Kernel generalized canonical correlation analysis. Computational Statistics & Data Analysis, 90, 114–131.

Parameters:

Name Type Description Default
latent_dimensions int

Number of latent dimensions. Default is 1.

1
center bool

Whether to subtract column means before fitting. Default True.

True
c float | list[float]

Regularisation parameter(s). Default is 0.1.

0.1
kernel str | list[str]

Kernel name(s). Default is "linear".

'linear'
gamma float | list[float | None] | None

Gamma for RBF/polynomial kernel.

None
degree float | list[float]

Degree for polynomial kernel.

1.0
coef0 float | list[float]

coef0 for polynomial/sigmoid kernel.

1.0
kernel_params dict[str, object] | list[dict[str, object]] | None

Extra per-view kernel keyword arguments.

None
view_weights list[float] | None

Per-view weights. Default is equal weights.

None
eps float

Regularisation floor. Default is 1e-6.

1e-06
Example

import numpy as np rng = np.random.default_rng(0) X1 = rng.standard_normal((30, 5)) X2 = rng.standard_normal((30, 5)) X3 = rng.standard_normal((30, 5)) model = KGCCA(latent_dimensions=2).fit([X1, X2, X3])

Source code in cca_zoo/nonparametric/_kgcca.py
def __init__(
    self,
    latent_dimensions: int = 1,
    center: bool = True,
    c: float | list[float] = 0.1,
    kernel: str | list[str] = "linear",
    gamma: float | list[float | None] | None = None,
    degree: float | list[float] = 1.0,
    coef0: float | list[float] = 1.0,
    kernel_params: dict[str, object] | list[dict[str, object]] | None = None,
    view_weights: list[float] | None = None,
    eps: float = 1e-6,
) -> None:
    super().__init__(latent_dimensions=latent_dimensions, center=center)
    self.c = c
    self.kernel = kernel
    self.gamma = gamma
    self.degree = degree
    self.coef0 = coef0
    self.kernel_params = kernel_params
    self.view_weights = view_weights
    self.eps = eps

fit

fit(views: list[ArrayLike], y: None = None) -> KGCCA

Fit the KGCCA model.

Parameters:

Name Type Description Default
views list[ArrayLike]

List of arrays, each (n_samples, n_features_i).

required
y None

Ignored.

None

Returns:

Name Type Description
self KGCCA

Fitted estimator.

Raises:

Type Description
ValueError

If fewer than 2 views are provided.

ValueError

If views have inconsistent numbers of samples.

Source code in cca_zoo/nonparametric/_kgcca.py
def fit(self, views: list[ArrayLike], y: None = None) -> KGCCA:
    """Fit the KGCCA model.

    Args:
        views: List of arrays, each (n_samples, n_features_i).
        y: Ignored.

    Returns:
        self: Fitted estimator.

    Raises:
        ValueError: If fewer than 2 views are provided.
        ValueError: If views have inconsistent numbers of samples.
    """
    views_: list[np.ndarray] = self._setup_fit(views)
    c_ = perview_parameter("c", self.c, 0.1, self.n_views_)
    mu = perview_parameter("view_weights", self.view_weights, 1.0, self.n_views_)
    kernel_ = perview_parameter("kernel", self.kernel, "linear", self.n_views_)
    gamma_ = perview_parameter("gamma", self.gamma, None, self.n_views_)
    degree_ = perview_parameter("degree", self.degree, 1.0, self.n_views_)
    coef0_ = perview_parameter("coef0", self.coef0, 1.0, self.n_views_)
    kp_ = perview_parameter("kernel_params", self.kernel_params, {}, self.n_views_)

    self.train_views_: list[np.ndarray] = views_
    kernels = [
        pairwise_kernels(
            v,
            metric=kernel_[i],
            gamma=gamma_[i],
            degree=degree_[i],
            coef0=coef0_[i],
            filter_params=True,
            **(kp_[i] if kp_[i] else {}),
        )
        for i, v in enumerate(views_)
    ]
    # Build Q (n x n)
    Q = np.zeros((self.n_samples_, self.n_samples_))
    for i, (K, ci, mi) in enumerate(zip(kernels, c_, mu)):
        B_i = ci * K + (1.0 - ci) * K @ K
        min_eig = np.linalg.eigvalsh(B_i).min()
        if min_eig < self.eps:
            B_i += (self.eps - min_eig) * np.eye(B_i.shape[0])
        Q += mi * K @ np.linalg.inv(B_i) @ K

    _, eigvecs = gevp(Q, None, self.latent_dimensions)
    T = eigvecs[:, : self.latent_dimensions]
    self.weights_: list[np.ndarray] = [np.linalg.pinv(K) @ T for K in kernels]
    # Store kernel parameters for transform
    self._kernel = kernel_
    self._gamma = gamma_
    self._degree = degree_
    self._coef0 = coef0_
    self._kp = kp_
    return self

transform

transform(views: list[ArrayLike]) -> list[np.ndarray]

Transform new views using fitted kernel dual variables.

Parameters:

Name Type Description Default
views list[ArrayLike]

List of arrays, each (n_samples_test, n_features_i).

required

Returns:

Type Description
list[ndarray]

List of arrays, each (n_samples_test, latent_dimensions).

Raises:

Type Description
NotFittedError

If fit has not been called.

Source code in cca_zoo/nonparametric/_kgcca.py
def transform(self, views: list[ArrayLike]) -> list[np.ndarray]:
    """Transform new views using fitted kernel dual variables.

    Args:
        views: List of arrays, each (n_samples_test, n_features_i).

    Returns:
        List of arrays, each (n_samples_test, latent_dimensions).

    Raises:
        sklearn.exceptions.NotFittedError: If ``fit`` has not been called.
    """
    check_is_fitted(self)
    from cca_zoo._utils._validation import validate_views

    validated = validate_views(views)
    result = []
    for i, v in enumerate(validated):
        K_test = pairwise_kernels(
            self.train_views_[i],
            Y=v,
            metric=self._kernel[i],
            gamma=self._gamma[i],
            degree=self._degree[i],
            coef0=self._coef0[i],
            filter_params=True,
            **(self._kp[i] if self._kp[i] else {}),
        )
        result.append(K_test.T @ self.weights_[i])
    return result

KTCCA

KTCCA(latent_dimensions: int = 1, center: bool = True, c: float | list[float] = 0.1, kernel: str | list[str] = 'linear', gamma: float | list[float | None] | None = None, degree: float | list[float] = 1.0, coef0: float | list[float] = 1.0, kernel_params: dict[str, object] | list[dict[str, object]] | None = None, eps: float = 0.001, random_state: int | None = None)

Bases: BaseModel

Kernel Tensor Canonical Correlation Analysis.

Extends TCCA to nonlinear relationships by computing the cross-moment tensor from whitened kernel matrices rather than from the raw views. Each kernel matrix :math:K_i is whitened using its regularised self-product, then PARAFAC is applied to the resulting cross-moment tensor.

References

Kim, T.-K., Wong, S.-F., & Cipolla, R. (2007). Tensor canonical correlation analysis for action classification. CVPR 2007. IEEE.

Parameters:

Name Type Description Default
latent_dimensions int

Number of latent dimensions. Default is 1.

1
center bool

Whether to subtract column means before fitting. Default True.

True
c float | list[float]

Regularisation parameter(s). Default is 0.1.

0.1
kernel str | list[str]

Kernel name(s). Default is "linear".

'linear'
gamma float | list[float | None] | None

Gamma for RBF/polynomial kernel.

None
degree float | list[float]

Degree for polynomial kernel.

1.0
coef0 float | list[float]

coef0 for polynomial/sigmoid kernel.

1.0
kernel_params dict[str, object] | list[dict[str, object]] | None

Extra per-view keyword arguments for the kernel.

None
eps float

Regularisation floor. Default is 1e-3.

0.001
random_state int | None

Seed for PARAFAC. Default is None.

None
Example

import numpy as np rng = np.random.default_rng(0) X1 = rng.standard_normal((20, 5)) X2 = rng.standard_normal((20, 5)) X3 = rng.standard_normal((20, 5)) model = KTCCA(latent_dimensions=1, random_state=0).fit([X1, X2, X3])

Source code in cca_zoo/nonparametric/_ktcca.py
def __init__(
    self,
    latent_dimensions: int = 1,
    center: bool = True,
    c: float | list[float] = 0.1,
    kernel: str | list[str] = "linear",
    gamma: float | list[float | None] | None = None,
    degree: float | list[float] = 1.0,
    coef0: float | list[float] = 1.0,
    kernel_params: dict[str, object] | list[dict[str, object]] | None = None,
    eps: float = 1e-3,
    random_state: int | None = None,
) -> None:
    super().__init__(latent_dimensions=latent_dimensions, center=center)
    self.c = c
    self.kernel = kernel
    self.gamma = gamma
    self.degree = degree
    self.coef0 = coef0
    self.kernel_params = kernel_params
    self.eps = eps
    self.random_state = random_state

fit

fit(views: list[ArrayLike], y: None = None) -> KTCCA

Fit the KTCCA model.

Parameters:

Name Type Description Default
views list[ArrayLike]

List of arrays, each (n_samples, n_features_i).

required
y None

Ignored.

None

Returns:

Name Type Description
self KTCCA

Fitted estimator.

Raises:

Type Description
ValueError

If fewer than 2 views are provided.

ValueError

If views have inconsistent numbers of samples.

Source code in cca_zoo/nonparametric/_ktcca.py
def fit(self, views: list[ArrayLike], y: None = None) -> KTCCA:
    """Fit the KTCCA model.

    Args:
        views: List of arrays, each (n_samples, n_features_i).
        y: Ignored.

    Returns:
        self: Fitted estimator.

    Raises:
        ValueError: If fewer than 2 views are provided.
        ValueError: If views have inconsistent numbers of samples.
    """
    views_: list[np.ndarray] = self._setup_fit(views)
    c_ = perview_parameter("c", self.c, 0.1, self.n_views_)
    kernel_ = perview_parameter("kernel", self.kernel, "linear", self.n_views_)
    gamma_ = perview_parameter("gamma", self.gamma, None, self.n_views_)
    degree_ = perview_parameter("degree", self.degree, 1.0, self.n_views_)
    coef0_ = perview_parameter("coef0", self.coef0, 1.0, self.n_views_)
    kp_ = perview_parameter("kernel_params", self.kernel_params, {}, self.n_views_)

    self.train_views_: list[np.ndarray] = views_
    # Store parameters for transform
    self._kernel = kernel_
    self._gamma = gamma_
    self._degree = degree_
    self._coef0 = coef0_
    self._kp = kp_

    kernels = [
        pairwise_kernels(
            v,
            metric=kernel_[i],
            gamma=gamma_[i],
            degree=degree_[i],
            coef0=coef0_[i],
            filter_params=True,
            **(kp_[i] if kp_[i] else {}),
        )
        for i, v in enumerate(views_)
    ]
    whitened, self._cov_invsqrt = self._whiten_kernels(kernels, c_)

    # Build cross-moment tensor
    M: np.ndarray | None = None
    for i, wk in enumerate(whitened):
        if M is None:
            M = wk
        else:
            for _ in range(len(M.shape) - 1):
                wk = np.expand_dims(wk, 1)
            M = np.expand_dims(M, -1) @ wk
    assert M is not None
    M = np.mean(M, 0)

    tl.set_backend("numpy")
    parafac_result = parafac(
        M,
        self.latent_dimensions,
        verbose=False,
        random_state=self.random_state,
    )
    self.weights_: list[np.ndarray] = [
        self._cov_invsqrt[i] @ fac for i, fac in enumerate(parafac_result.factors)
    ]
    return self

transform

transform(views: list[ArrayLike]) -> list[np.ndarray]

Transform new views using fitted kernel dual variables.

Parameters:

Name Type Description Default
views list[ArrayLike]

List of arrays, each (n_samples_test, n_features_i).

required

Returns:

Type Description
list[ndarray]

List of arrays, each (n_samples_test, latent_dimensions).

Raises:

Type Description
NotFittedError

If fit has not been called.

Source code in cca_zoo/nonparametric/_ktcca.py
def transform(self, views: list[ArrayLike]) -> list[np.ndarray]:
    """Transform new views using fitted kernel dual variables.

    Args:
        views: List of arrays, each (n_samples_test, n_features_i).

    Returns:
        List of arrays, each (n_samples_test, latent_dimensions).

    Raises:
        sklearn.exceptions.NotFittedError: If ``fit`` has not been called.
    """
    check_is_fitted(self)
    from cca_zoo._utils._validation import validate_views

    validated = validate_views(views)
    result = []
    for i, v in enumerate(validated):
        K_test = pairwise_kernels(
            self.train_views_[i],
            Y=v,
            metric=self._kernel[i],
            gamma=self._gamma[i],
            degree=self._degree[i],
            coef0=self._coef0[i],
            filter_params=True,
            **(self._kp[i] if self._kp[i] else {}),
        )
        result.append(K_test.T @ self.weights_[i])
    return result