Skip to content

cca_zoo.model_selection

Cross-validated hyperparameter search for multiview models.


GridSearchCV

GridSearchCV(estimator: BaseEstimator, param_grid: dict[str, list[Any]] | list[dict[str, list[Any]]], cv: int | Any = 5, scoring: str | None = None, n_jobs: int | None = None, refit: bool = True, verbose: int = 0)

Grid search with cross-validation for multiview CCA models.

Wraps :class:sklearn.model_selection.GridSearchCV to support the list[ArrayLike] interface of cca_zoo models. Views are horizontally stacked before being passed to sklearn and split back inside the wrapped estimator.

Parameters:

Name Type Description Default
estimator BaseEstimator

A multiview CCA estimator (e.g. :class:~cca_zoo.linear.CCA).

required
param_grid dict[str, list[Any]] | list[dict[str, list[Any]]]

Dictionary or list of dictionaries with parameter names as keys and lists of parameter settings as values.

required
cv int | Any

Number of cross-validation folds or a cross-validation splitter. Default is 5.

5
scoring str | None

Scoring strategy. When None the estimator's default :meth:score method is used.

None
n_jobs int | None

Number of jobs to run in parallel. Default is None (sequential).

None
refit bool

Whether to refit the best estimator on the full dataset. Default is True.

True
verbose int

Verbosity level. Default is 0.

0
Example

import numpy as np from cca_zoo.linear import CCA from cca_zoo.model_selection import GridSearchCV rng = np.random.default_rng(0) X1 = rng.standard_normal((50, 5)) X2 = rng.standard_normal((50, 4)) gs = GridSearchCV( ... CCA(), param_grid={"latent_dimensions": [1, 2]}, cv=2 ... ) gs.fit([X1, X2])

Source code in cca_zoo/model_selection/_search.py
def __init__(
    self,
    estimator: BaseEstimator,
    param_grid: dict[str, list[Any]] | list[dict[str, list[Any]]],
    cv: int | Any = 5,
    scoring: str | None = None,
    n_jobs: int | None = None,
    refit: bool = True,
    verbose: int = 0,
) -> None:
    self.estimator = estimator
    self.param_grid = param_grid
    self.cv = cv
    self.scoring = scoring
    self.n_jobs = n_jobs
    self.refit = refit
    self.verbose = verbose

fit

fit(views: list[ArrayLike], y: None = None, **fit_params: Any) -> GridSearchCV

Run grid search with cross-validation on multiview data.

Parameters:

Name Type Description Default
views list[ArrayLike]

List of arrays, each of shape (n_samples, n_features_i). All arrays must have the same number of rows.

required
y None

Ignored.

None
**fit_params Any

Additional keyword arguments forwarded to the estimator's fit method during each fold.

{}

Returns:

Name Type Description
self GridSearchCV

Fitted grid search object.

Source code in cca_zoo/model_selection/_search.py
def fit(
    self,
    views: list[ArrayLike],
    y: None = None,
    **fit_params: Any,
) -> GridSearchCV:
    """Run grid search with cross-validation on multiview data.

    Args:
        views: List of arrays, each of shape (n_samples, n_features_i).
            All arrays must have the same number of rows.
        y: Ignored.
        **fit_params: Additional keyword arguments forwarded to the
            estimator's ``fit`` method during each fold.

    Returns:
        self: Fitted grid search object.
    """
    arrays = [np.asarray(v) for v in views]
    split_indices = [a.shape[1] for a in arrays]
    x_concat = np.hstack(arrays)

    wrapped_estimator = _MultiviewWrapper(
        estimator=self.estimator,
        split_indices=split_indices,
    )
    wrapped_grid = self._make_wrapped_param_grid(split_indices)

    self._inner_cv: skms.GridSearchCV = skms.GridSearchCV(
        estimator=wrapped_estimator,
        param_grid=wrapped_grid,
        cv=self.cv,
        scoring=self.scoring,
        n_jobs=self.n_jobs,
        refit=self.refit,
        verbose=self.verbose,
    )
    self._inner_cv.fit(x_concat, y, **fit_params)

    # Expose the most important attributes at the top level
    self.cv_results_: dict[str, Any] = self._inner_cv.cv_results_
    self.best_score_: float = self._inner_cv.best_score_
    # Strip the estimator__ prefix from best_params_
    raw_best: dict[str, Any] = self._inner_cv.best_params_
    self.best_params_: dict[str, Any] = {
        k[len("estimator__") :]: v for k, v in raw_best.items()
    }
    if self.refit:
        self.best_estimator_: BaseEstimator = (
            self._inner_cv.best_estimator_.estimator_
        )
    return self

score

score(views: list[ArrayLike], y: None = None) -> float

Score the best estimator on held-out multiview data.

Parameters:

Name Type Description Default
views list[ArrayLike]

List of arrays, each of shape (n_samples, n_features_i).

required
y None

Ignored.

None

Returns:

Name Type Description
Scalar float

mean canonical correlation of the best estimator.

Source code in cca_zoo/model_selection/_search.py
def score(self, views: list[ArrayLike], y: None = None) -> float:
    """Score the best estimator on held-out multiview data.

    Args:
        views: List of arrays, each of shape (n_samples, n_features_i).
        y: Ignored.

    Returns:
        Scalar: mean canonical correlation of the best estimator.
    """
    arrays = [np.asarray(v) for v in views]
    x_concat = np.hstack(arrays)
    return float(self._inner_cv.score(x_concat, y))