Module `crossense.ensemble`

The :mod:crossense.ensemble module includes ensemble-based methods for classification, regression and anomaly detection.

Expand source code

"""
The :mod:`crossense.ensemble` module includes ensemble-based methods for
classification, regression and anomaly detection.
"""
from ._bagging import BaseCrossBagging, CrossBaggingClassifier, CrossBaggingRegressor

__all__ = [
    "BaseCrossBagging",
    "CrossBaggingClassifier",
    "CrossBaggingRegressor",
]

Sub-modules

crossense.ensemble.tests

Classes

class BaseCrossBagging (estimator=None, cv=5, *, n_jobs=None, verbose=0)

Base class for cross-fold Bagging meta-estimator.

Warning: This class should not be used directly. Use derived classes instead.

Expand source code

class BaseCrossBagging(BaseEnsemble, metaclass=ABCMeta):
    """Base class for cross-fold Bagging meta-estimator.

    Warning: This class should not be used directly. Use derived classes
    instead.
    """

    _parameter_constraints: dict = {
        "estimator": [HasMethods(["fit", "predict"]), None],
        "n_jobs": [None, Integral],
        "random_state": ["random_state"],
        "verbose": ["verbose"],
    }

    @abstractmethod
    def __init__(
        self,
        estimator=None,
        cv=5,
        *,
        n_jobs=None,
        verbose=0,
    ):
        self.cv: _BaseKFold = check_cv(cv, classifier=is_classifier(estimator))
        super().__init__(
            estimator=estimator,
            n_estimators=self.cv.n_splits,
        )
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.estimators_ = []
        self.estimators_samples_ = []

    @_fit_context(
        # BaseBagging.estimator is not validated yet
        prefer_skip_nested_validation=False
    )
    def fit(self, X, y, sample_weight=None):
        """Build a Bagging ensemble of estimators from the training set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        y : array-like of shape (n_samples,)
            The target values (class labels in classification, real numbers in
            regression).

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted.
            Note that this is supported only if the base estimator supports
            sample weighting.
        """
        # Convert data (X is required to be 2d and indexable)
        X, y = self._validate_data(
            X,
            y,
            accept_sparse=["csr", "csc"],
            dtype=None,
            force_all_finite=False,
            multi_output=True,
        )
        return self._fit(X, y, sample_weight=sample_weight)

    # noinspection PyMethodMayBeStatic
    def _parallel_args(self):
        return {}

    def _fit(
        self,
        X,
        y,
        max_depth=None,
        sample_weight=None,
        check_input=True,
    ):
        """Build a Bagging ensemble of estimators from the training
           set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        y : array-like of shape (n_samples,)
            The target values (class labels in classification, real numbers in
            regression).

        max_depth : int, default=None
            Override value used when constructing base estimator. Only
            supported if the base estimator has a max_depth parameter.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted.
            Note that this is supported only if the base estimator supports
            sample weighting.

        check_input : bool, default=True
            Override value used when fitting base estimator. Only supported
            if the base estimator has a check_input parameter for fit function.
        """
        self._generate_fold_indices(X, y, None)
        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)

        # Remap output
        n_samples = X.shape[0]
        self._n_samples = n_samples
        y = self._validate_y(y)

        # Check parameters
        self._validate_estimator()

        if max_depth is not None:
            self.estimator_.max_depth = max_depth

        # Parallel loop
        n_jobs, n_estimators, starts = _partition_estimators(
            self.n_estimators, self.n_jobs
        )
        total_n_estimators = sum(n_estimators)

        all_results = Parallel(
            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()
        )(
            delayed(_parallel_build_estimators)(
                n_estimators[i],
                self,
                self.estimators_samples_[starts[i] : starts[i + 1]],
                X,
                y,
                sample_weight,
                total_n_estimators,
                verbose=self.verbose,
                check_input=check_input,
            )
            for i in range(n_jobs)
        )

        # Reduce
        self.estimators_ = list(
            itertools.chain.from_iterable(t[0] for t in all_results)
        )
        return self

    # noinspection PyMethodMayBeStatic
    def _validate_y(self, y):
        if len(y.shape) == 1 or y.shape[1] == 1:
            return column_or_1d(y, warn=True)
        return y

    def _generate_fold_indices(self, X, y, groups):
        check_is_fitted(self)
        for fold in self.cv.split(X, y, groups):
            self.estimators_samples_.append(fold[0])

    def set_params(self, **params):
        cv = params.pop("cv", None)
        if cv:
            self.cv = check_cv(cv, classifier=is_classifier(self.estimator))
        return super().set_params(**params)

Ancestors

sklearn.ensemble._base.BaseEnsemble
sklearn.base.MetaEstimatorMixin
sklearn.base.BaseEstimator
sklearn.utils._metadata_requests._MetadataRequester

Subclasses

crossense.ensemble._bagging.CrossBaggingClassifier
crossense.ensemble._bagging.CrossBaggingRegressor

Methods

def fit(self, X, y, sample_weight=None)

Build a Bagging ensemble of estimators from the training set (X, y).

Parameters

X : {array-like, sparse matrix} of shape (n_samples, n_features): The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.
y : array-like of shape (n_samples,): The target values (class labels in classification, real numbers in regression).
sample_weight : array-like of shape (n_samples,), default=None: Sample weights. If None, then samples are equally weighted. Note that this is supported only if the base estimator supports sample weighting.

Expand source code

@_fit_context(
    # BaseBagging.estimator is not validated yet
    prefer_skip_nested_validation=False
)
def fit(self, X, y, sample_weight=None):
    """Build a Bagging ensemble of estimators from the training set (X, y).

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The training input samples. Sparse matrices are accepted only if
        they are supported by the base estimator.

    y : array-like of shape (n_samples,)
        The target values (class labels in classification, real numbers in
        regression).

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights. If None, then samples are equally weighted.
        Note that this is supported only if the base estimator supports
        sample weighting.
    """
    # Convert data (X is required to be 2d and indexable)
    X, y = self._validate_data(
        X,
        y,
        accept_sparse=["csr", "csc"],
        dtype=None,
        force_all_finite=False,
        multi_output=True,
    )
    return self._fit(X, y, sample_weight=sample_weight)

def set_fit_request(self: crossense.ensemble._bagging.BaseCrossBagging, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.BaseCrossBagging

Request metadata passed to the fit method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

True: metadata is requested, and passed to fit if provided. The request is ignored if metadata is not provided.
False: metadata is not requested and the meta-estimator will not pass it to fit.
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED: Metadata routing for sample_weight parameter in fit.

Returns

self : object: The updated object.

Expand source code

def func(**kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
            f" are: {set(self.keys)}"
        )

    requests = instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    instance._metadata_request = requests

    return instance

def set_params(self, **params)

Set the parameters of this estimator.

The method works on simple estimators as well as on nested objects (such as :class:~sklearn.pipeline.Pipeline). The latter have parameters of the form <component>__<parameter> so that it's possible to update each component of a nested object.

Parameters

**params : dict: Estimator parameters.

Returns

self : estimator instance: Estimator instance.

Expand source code

def set_params(self, **params):
    cv = params.pop("cv", None)
    if cv:
        self.cv = check_cv(cv, classifier=is_classifier(self.estimator))
    return super().set_params(**params)

class CrossBaggingClassifier (estimator: object = None, cv: Union[int, BaseCrossValidator, Iterable] = 5, *, n_jobs: Optional[int] = None, verbose=0)

A cross-validation Bagging classifier.

A Bagging classifier is an ensemble meta-estimator that fits base classifiers each on a fold of cross-validation generator

Attributes

estimator_ : estimator: The base estimator from which the ensemble is grown.
n_features_in_ : int: Number of features seen during :term:fit.
feature_names_in_ : ndarray of shape (n_features_in_,): Names of features seen during :term:fit. Defined only when X has feature names that are all strings.
estimators_ : list of estimators: The collection of fitted base estimators.
estimators_samples_ : list of arrays: The subset of drawn samples (i.e., the in-bag samples) for each base estimator. Each subset is defined by an array of the indices selected.
classes_ : ndarray of shape (n_classes,): The classes labels.
n_classes_ : int or list: The number of classes.

Examples

>>> from sklearn.svm import SVC
>>> from crossense.ensemble import CrossBaggingClassifier
>>> from sklearn.datasets import make_classification
>>> X, y = make_classification(n_samples=100, n_features=4,
...                            n_informative=2, n_redundant=0,
...                            random_state=0, shuffle=False)
>>> clf = CrossBaggingClassifier(estimator=SVC(), cv=5).fit(X, y)
>>> clf.predict([[0, 0, 0, 0]])
array([1])

Parameters

estimator: The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:~sklearn.tree.DecisionTreeClassifier.

cv: Determines the cross-validation splitting strategy. Possible inputs for cv are:

- <code>None</code>, to use the default 5-fold cross validation,
- int, to specify the number of folds in a <code>(Stratified)KFold</code>,
- :term:<code>CV splitter</code>,
- An iterable that generates (train, test) splits as arrays of indices.

For <code>int</code>/<code>None</code> inputs, if the estimator is a classifier and <code>y</code> is
either binary or multiclass, :class:<code>StratifiedKFold</code> is used. In all
other cases, :class:<code>KFold</code> is used. These splitters are instantiated
with `shuffle=False` so the splits will be the same across calls.

Refer :ref:`User Guide <cross_validation>` for the various
cross-validation strategies that can be used here.

n_jobs: The number of jobs to run in parallel for both :meth:fit and :meth:predict. None means 1 unless in a :obj:joblib.parallel_backend context. -1 means using all processors. See :term:Glossary <n_jobs> for more details.

verbose: Controls the verbosity when fitting and predicting.

Expand source code

class CrossBaggingClassifier(ClassifierMixin, BaseCrossBagging):
    """A cross-validation Bagging classifier.

    A Bagging classifier is an ensemble meta-estimator that fits base
    classifiers each on a fold of cross-validation generator

    Attributes
    ----------
    estimator_ : estimator
        The base estimator from which the ensemble is grown.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

    estimators_ : list of estimators
        The collection of fitted base estimators.

    estimators_samples_ : list of arrays
        The subset of drawn samples (i.e., the in-bag samples) for each base
        estimator. Each subset is defined by an array of the indices selected.

    classes_ : ndarray of shape (n_classes,)
        The classes labels.

    n_classes_ : int or list
        The number of classes.

    Examples
    --------
    >>> from sklearn.svm import SVC
    >>> from crossense.ensemble import CrossBaggingClassifier
    >>> from sklearn.datasets import make_classification
    >>> X, y = make_classification(n_samples=100, n_features=4,
    ...                            n_informative=2, n_redundant=0,
    ...                            random_state=0, shuffle=False)
    >>> clf = CrossBaggingClassifier(estimator=SVC(), cv=5).fit(X, y)
    >>> clf.predict([[0, 0, 0, 0]])
    array([1])
    """

    def __init__(
        self,
        estimator: object = None,
        cv: Union[int, BaseCrossValidator, Iterable] = 5,
        *,
        n_jobs: Optional[int] = None,
        verbose=0,
    ):
        """
        Parameters
        ----------
        estimator:
            The base estimator to fit on random subsets of the dataset.
            If None, then the base estimator is a
            :class:`~sklearn.tree.DecisionTreeClassifier`.

        cv:
            Determines the cross-validation splitting strategy.
            Possible inputs for cv are:

            - `None`, to use the default 5-fold cross validation,
            - int, to specify the number of folds in a `(Stratified)KFold`,
            - :term:`CV splitter`,
            - An iterable that generates (train, test) splits as arrays of indices.

            For `int`/`None` inputs, if the estimator is a classifier and `y` is
            either binary or multiclass, :class:`StratifiedKFold` is used. In all
            other cases, :class:`KFold` is used. These splitters are instantiated
            with `shuffle=False` so the splits will be the same across calls.

            Refer :ref:`User Guide <cross_validation>` for the various
            cross-validation strategies that can be used here.

        n_jobs:
            The number of jobs to run in parallel for both :meth:`fit` and
            :meth:`predict`. ``None`` means 1 unless in a
            :obj:`joblib.parallel_backend` context. ``-1`` means using all
            processors. See :term:`Glossary <n_jobs>` for more details.

        verbose:
            Controls the verbosity when fitting and predicting.
        """
        super().__init__(
            estimator=estimator,
            cv=cv,
            n_jobs=n_jobs,
            verbose=verbose,
        )

    def _validate_estimator(self, default=None):
        """Check the estimator and set the estimator_ attribute."""
        super()._validate_estimator(default=DecisionTreeClassifier())

    def _validate_y(self, y):
        y = column_or_1d(y, warn=True)
        check_classification_targets(y)
        self.classes_, y = np.unique(y, return_inverse=True)
        self.n_classes_ = len(self.classes_)

        return y

    def predict_all_proba(self, X):
        """Predict class probabilities of all models for X.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        p : ndarray of shape (n_estimators, n_samples, n_classes)
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute :term:`classes_`.
        """
        check_is_fitted(self)
        # Check data
        X = self._validate_data(
            X,
            accept_sparse=["csr", "csc"],
            dtype=None,
            force_all_finite=False,
            reset=False,
        )

        # Parallel loop
        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

        all_proba = Parallel(
            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()
        )(
            delayed(_parallel_predict_proba)(
                self.estimators_[starts[i] : starts[i + 1]],
                X,
                self.n_classes_,
            )
            for i in range(n_jobs)
        )
        all_proba = list(itertools.chain.from_iterable(all_proba))
        return np.concatenate([x[np.newaxis, :, :] for x in all_proba], axis=0)

    def predict(self, X):
        """Predict class for X.

        The predicted class of an input sample is computed as the class with
        the highest mean predicted probability. If base estimators do not
        implement a ``predict_proba`` method, then it resorts to voting.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        y : ndarray of shape (n_samples,)
            The predicted classes.
        """
        predicted_probabilitiy = self.predict_proba(X)
        return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)), axis=0)

    def predict_proba(self, X):
        """Predict class probabilities for X.

        The predicted class probabilities of an input sample is computed as
        the mean predicted class probabilities of the base estimators in the
        ensemble. If base estimators do not implement a ``predict_proba``
        method, then it resorts to voting and the predicted class probabilities
        of an input sample represents the proportion of estimators predicting
        each class.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        p : ndarray of shape (n_samples, n_classes)
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute :term:`classes_`.
        """
        all_proba = self.predict_all_proba(X)
        # Reduce
        proba = all_proba.mean(axis=0)

        return proba

    def predict_log_proba(self, X):
        """Predict class log-probabilities for X.

        The predicted class log-probabilities of an input sample is computed as
        the log of the mean predicted class probabilities of the base
        estimators in the ensemble.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        p : ndarray of shape (n_samples, n_classes)
            The class log-probabilities of the input samples. The order of the
            classes corresponds to that in the attribute :term:`classes_`.
        """
        check_is_fitted(self)
        if hasattr(self.estimator_, "predict_log_proba"):
            # Check data
            X = self._validate_data(
                X,
                accept_sparse=["csr", "csc"],
                dtype=None,
                force_all_finite=False,
                reset=False,
            )

            # Parallel loop
            n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

            all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
                delayed(_parallel_predict_log_proba)(
                    self.estimators_[starts[i] : starts[i + 1]],
                    X,
                    self.n_classes_,
                )
                for i in range(n_jobs)
            )

            # Reduce
            log_proba = all_log_proba[0]

            for j in range(1, len(all_log_proba)):
                log_proba = np.logaddexp(log_proba, all_log_proba[j])

            log_proba -= np.log(self.n_estimators)

        else:
            log_proba = np.log(self.predict_proba(X))

        return log_proba

    @available_if(_estimator_has("decision_function"))
    def decision_function(self, X):
        """Average of the decision functions of the base classifiers.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        score : ndarray of shape (n_samples, k)
            The decision function of the input samples. The columns correspond
            to the classes in sorted order, as they appear in the attribute
            ``classes_``. Regression and binary classification are special
            cases with ``k == 1``, otherwise ``k==n_classes``.
        """
        # noinspection DuplicatedCode
        check_is_fitted(self)

        # Check data
        X = self._validate_data(
            X,
            accept_sparse=["csr", "csc"],
            dtype=None,
            force_all_finite=False,
            reset=False,
        )

        # Parallel loop
        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

        all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
            delayed(_parallel_decision_function)(
                self.estimators_[starts[i] : starts[i + 1]],
                X,
            )
            for i in range(n_jobs)
        )

        # Reduce
        decisions = sum(all_decisions) / self.n_estimators

        return decisions

    def _more_tags(self):
        if self.estimator is None:
            estimator = DecisionTreeClassifier()
        else:
            estimator = self.estimator

        return {"allow_nan": _safe_tags(estimator, "allow_nan")}

Ancestors

sklearn.base.ClassifierMixin
crossense.ensemble._bagging.BaseCrossBagging
sklearn.ensemble._base.BaseEnsemble
sklearn.base.MetaEstimatorMixin
sklearn.base.BaseEstimator
sklearn.utils._metadata_requests._MetadataRequester

Methods

def decision_function(self, X)

Average of the decision functions of the base classifiers.

Parameters

X : {array-like, sparse matrix} of shape (n_samples, n_features): The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.

Returns

score : ndarray of shape (n_samples, k): The decision function of the input samples. The columns correspond to the classes in sorted order, as they appear in the attribute classes_. Regression and binary classification are special cases with k == 1, otherwise k==n_classes.

Expand source code

@available_if(_estimator_has("decision_function"))
def decision_function(self, X):
    """Average of the decision functions of the base classifiers.

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The training input samples. Sparse matrices are accepted only if
        they are supported by the base estimator.

    Returns
    -------
    score : ndarray of shape (n_samples, k)
        The decision function of the input samples. The columns correspond
        to the classes in sorted order, as they appear in the attribute
        ``classes_``. Regression and binary classification are special
        cases with ``k == 1``, otherwise ``k==n_classes``.
    """
    # noinspection DuplicatedCode
    check_is_fitted(self)

    # Check data
    X = self._validate_data(
        X,
        accept_sparse=["csr", "csc"],
        dtype=None,
        force_all_finite=False,
        reset=False,
    )

    # Parallel loop
    n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

    all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
        delayed(_parallel_decision_function)(
            self.estimators_[starts[i] : starts[i + 1]],
            X,
        )
        for i in range(n_jobs)
    )

    # Reduce
    decisions = sum(all_decisions) / self.n_estimators

    return decisions

def predict(self, X)

Predict class for X.

The predicted class of an input sample is computed as the class with the highest mean predicted probability. If base estimators do not implement a predict_proba method, then it resorts to voting.

Parameters

X : {array-like, sparse matrix} of shape (n_samples, n_features): The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.

Returns

y : ndarray of shape (n_samples,): The predicted classes.

Expand source code

def predict(self, X):
    """Predict class for X.

    The predicted class of an input sample is computed as the class with
    the highest mean predicted probability. If base estimators do not
    implement a ``predict_proba`` method, then it resorts to voting.

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The training input samples. Sparse matrices are accepted only if
        they are supported by the base estimator.

    Returns
    -------
    y : ndarray of shape (n_samples,)
        The predicted classes.
    """
    predicted_probabilitiy = self.predict_proba(X)
    return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)), axis=0)

def predict_all_proba(self, X)

Predict class probabilities of all models for X.

Parameters

X : {array-like, sparse matrix} of shape (n_samples, n_features): The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.

Returns

p : ndarray of shape (n_estimators, n_samples, n_classes): The class probabilities of the input samples. The order of the classes corresponds to that in the attribute :term:classes_.

Expand source code

def predict_all_proba(self, X):
    """Predict class probabilities of all models for X.

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The training input samples. Sparse matrices are accepted only if
        they are supported by the base estimator.

    Returns
    -------
    p : ndarray of shape (n_estimators, n_samples, n_classes)
        The class probabilities of the input samples. The order of the
        classes corresponds to that in the attribute :term:`classes_`.
    """
    check_is_fitted(self)
    # Check data
    X = self._validate_data(
        X,
        accept_sparse=["csr", "csc"],
        dtype=None,
        force_all_finite=False,
        reset=False,
    )

    # Parallel loop
    n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

    all_proba = Parallel(
        n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()
    )(
        delayed(_parallel_predict_proba)(
            self.estimators_[starts[i] : starts[i + 1]],
            X,
            self.n_classes_,
        )
        for i in range(n_jobs)
    )
    all_proba = list(itertools.chain.from_iterable(all_proba))
    return np.concatenate([x[np.newaxis, :, :] for x in all_proba], axis=0)

def predict_log_proba(self, X)

Predict class log-probabilities for X.

The predicted class log-probabilities of an input sample is computed as the log of the mean predicted class probabilities of the base estimators in the ensemble.

Parameters

X : {array-like, sparse matrix} of shape (n_samples, n_features): The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.

Returns

p : ndarray of shape (n_samples, n_classes): The class log-probabilities of the input samples. The order of the classes corresponds to that in the attribute :term:classes_.

Expand source code

def predict_log_proba(self, X):
    """Predict class log-probabilities for X.

    The predicted class log-probabilities of an input sample is computed as
    the log of the mean predicted class probabilities of the base
    estimators in the ensemble.

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The training input samples. Sparse matrices are accepted only if
        they are supported by the base estimator.

    Returns
    -------
    p : ndarray of shape (n_samples, n_classes)
        The class log-probabilities of the input samples. The order of the
        classes corresponds to that in the attribute :term:`classes_`.
    """
    check_is_fitted(self)
    if hasattr(self.estimator_, "predict_log_proba"):
        # Check data
        X = self._validate_data(
            X,
            accept_sparse=["csr", "csc"],
            dtype=None,
            force_all_finite=False,
            reset=False,
        )

        # Parallel loop
        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

        all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
            delayed(_parallel_predict_log_proba)(
                self.estimators_[starts[i] : starts[i + 1]],
                X,
                self.n_classes_,
            )
            for i in range(n_jobs)
        )

        # Reduce
        log_proba = all_log_proba[0]

        for j in range(1, len(all_log_proba)):
            log_proba = np.logaddexp(log_proba, all_log_proba[j])

        log_proba -= np.log(self.n_estimators)

    else:
        log_proba = np.log(self.predict_proba(X))

    return log_proba

def predict_proba(self, X)

Predict class probabilities for X.

The predicted class probabilities of an input sample is computed as the mean predicted class probabilities of the base estimators in the ensemble. If base estimators do not implement a predict_proba method, then it resorts to voting and the predicted class probabilities of an input sample represents the proportion of estimators predicting each class.

Parameters

X : {array-like, sparse matrix} of shape (n_samples, n_features): The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.

Returns

p : ndarray of shape (n_samples, n_classes): The class probabilities of the input samples. The order of the classes corresponds to that in the attribute :term:classes_.

Expand source code

def predict_proba(self, X):
    """Predict class probabilities for X.

    The predicted class probabilities of an input sample is computed as
    the mean predicted class probabilities of the base estimators in the
    ensemble. If base estimators do not implement a ``predict_proba``
    method, then it resorts to voting and the predicted class probabilities
    of an input sample represents the proportion of estimators predicting
    each class.

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The training input samples. Sparse matrices are accepted only if
        they are supported by the base estimator.

    Returns
    -------
    p : ndarray of shape (n_samples, n_classes)
        The class probabilities of the input samples. The order of the
        classes corresponds to that in the attribute :term:`classes_`.
    """
    all_proba = self.predict_all_proba(X)
    # Reduce
    proba = all_proba.mean(axis=0)

    return proba

def set_fit_request(self: crossense.ensemble._bagging.CrossBaggingClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.CrossBaggingClassifier

Request metadata passed to the fit method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

True: metadata is requested, and passed to fit if provided. The request is ignored if metadata is not provided.
False: metadata is not requested and the meta-estimator will not pass it to fit.
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED: Metadata routing for sample_weight parameter in fit.

Returns

self : object: The updated object.

Expand source code

def func(**kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
            f" are: {set(self.keys)}"
        )

    requests = instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    instance._metadata_request = requests

    return instance

def set_score_request(self: crossense.ensemble._bagging.CrossBaggingClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.CrossBaggingClassifier

Request metadata passed to the score method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided.
False: metadata is not requested and the meta-estimator will not pass it to score.
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED: Metadata routing for sample_weight parameter in score.

Returns

self : object: The updated object.

Expand source code

def func(**kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
            f" are: {set(self.keys)}"
        )

    requests = instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    instance._metadata_request = requests

    return instance

class CrossBaggingRegressor (estimator: object = None, cv: Union[int, BaseCrossValidator, Iterable] = 5, *, n_jobs: Optional[int] = None, verbose=0)

A cross-validation Bagging regressor.

A Bagging regressor is an ensemble meta-estimator that fits base regressors each on a fold of cross-validation generator

Attributes

estimator_ : estimator: The base estimator from which the ensemble is grown.
n_features_in_ : int: Number of features seen during :term:fit.
feature_names_in_ : ndarray of shape (n_features_in_,): Names of features seen during :term:fit. Defined only when X has feature names that are all strings.
estimators_ : list of estimators: The collection of fitted sub-estimators.
estimators_samples_ : list of arrays: The subset of drawn samples (i.e., the in-bag samples) for each base estimator. Each subset is defined by an array of the indices selected.

Examples

>>> from sklearn.svm import SVR
>>> from crossense.ensemble import CrossBaggingRegressor
>>> from sklearn.datasets import make_regression
>>> X, y = make_regression(n_samples=100, n_features=4,
...                        n_informative=2, n_targets=1,
...                        random_state=0, shuffle=False)
>>> regr = CrossBaggingRegressor(estimator=SVR(), cv=5).fit(X, y)
>>> regr.predict([[0, 0, 0, 0]])
array([-2.8720...])

Parameters

estimator: The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:~sklearn.tree.DecisionTreeClassifier.

cv: Determines the cross-validation splitting strategy. Possible inputs for cv are:

- <code>None</code>, to use the default 5-fold cross validation,
- int, to specify the number of folds in a <code>(Stratified)KFold</code>,
- :term:<code>CV splitter</code>,
- An iterable that generates (train, test) splits as arrays of indices.

For <code>int</code>/<code>None</code> inputs, if the estimator is a classifier and <code>y</code> is
either binary or multiclass, :class:<code>StratifiedKFold</code> is used. In all
other cases, :class:<code>KFold</code> is used. These splitters are instantiated
with `shuffle=False` so the splits will be the same across calls.

Refer :ref:`User Guide <cross_validation>` for the various
cross-validation strategies that can be used here.

verbose: Controls the verbosity when fitting and predicting.

Expand source code

class CrossBaggingRegressor(RegressorMixin, BaseCrossBagging):
    """A cross-validation Bagging regressor.

    A Bagging regressor is an ensemble meta-estimator that fits base
    regressors each on a fold of cross-validation generator

    Attributes
    ----------
    estimator_ : estimator
        The base estimator from which the ensemble is grown.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

    estimators_ : list of estimators
        The collection of fitted sub-estimators.

    estimators_samples_ : list of arrays
        The subset of drawn samples (i.e., the in-bag samples) for each base
        estimator. Each subset is defined by an array of the indices selected.

    Examples
    --------
    >>> from sklearn.svm import SVR
    >>> from crossense.ensemble import CrossBaggingRegressor
    >>> from sklearn.datasets import make_regression
    >>> X, y = make_regression(n_samples=100, n_features=4,
    ...                        n_informative=2, n_targets=1,
    ...                        random_state=0, shuffle=False)
    >>> regr = CrossBaggingRegressor(estimator=SVR(), cv=5).fit(X, y)
    >>> regr.predict([[0, 0, 0, 0]])
    array([-2.8720...])
    """

    def __init__(
        self,
        estimator: object = None,
        cv: Union[int, BaseCrossValidator, Iterable] = 5,
        *,
        n_jobs: Optional[int] = None,
        verbose=0,
    ):
        """
        Parameters
        ----------
        estimator:
            The base estimator to fit on random subsets of the dataset.
            If None, then the base estimator is a
            :class:`~sklearn.tree.DecisionTreeClassifier`.

        cv:
            Determines the cross-validation splitting strategy.
            Possible inputs for cv are:

            - `None`, to use the default 5-fold cross validation,
            - int, to specify the number of folds in a `(Stratified)KFold`,
            - :term:`CV splitter`,
            - An iterable that generates (train, test) splits as arrays of indices.

            For `int`/`None` inputs, if the estimator is a classifier and `y` is
            either binary or multiclass, :class:`StratifiedKFold` is used. In all
            other cases, :class:`KFold` is used. These splitters are instantiated
            with `shuffle=False` so the splits will be the same across calls.

            Refer :ref:`User Guide <cross_validation>` for the various
            cross-validation strategies that can be used here.

        n_jobs:
            The number of jobs to run in parallel for both :meth:`fit` and
            :meth:`predict`. ``None`` means 1 unless in a
            :obj:`joblib.parallel_backend` context. ``-1`` means using all
            processors. See :term:`Glossary <n_jobs>` for more details.

        verbose:
            Controls the verbosity when fitting and predicting.
        """
        super().__init__(
            estimator=estimator,
            cv=cv,
            n_jobs=n_jobs,
            verbose=verbose,
        )

    def predict_all(self, X):
        """Predict regression target of all models for X.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        p : ndarray of shape (n_estimators, n_samples, )
            The predicted values.
        """
        # noinspection DuplicatedCode
        check_is_fitted(self)
        # Check data
        X = self._validate_data(
            X,
            accept_sparse=["csr", "csc"],
            dtype=None,
            force_all_finite=False,
            reset=False,
        )

        # Parallel loop
        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

        all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
            delayed(_parallel_predict_regression)(
                self.estimators_[starts[i] : starts[i + 1]],
                X,
            )
            for i in range(n_jobs)
        )
        all_y_hat = list(itertools.chain.from_iterable(all_y_hat))
        return np.concatenate([x[np.newaxis, :] for x in all_y_hat], axis=0)

    def predict(self, X):
        """Predict regression target for X.

        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the estimators in the ensemble.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        y : ndarray of shape (n_samples,)
            The predicted values.
        """
        all_y_hat = self.predict_all(X)
        # Reduce
        y_hat = sum(all_y_hat) / self.n_estimators

        return y_hat

    # noinspection PyMethodOverriding
    def _validate_estimator(self):
        """Check the estimator and set the estimator_ attribute."""
        super()._validate_estimator(default=DecisionTreeRegressor())

    def _more_tags(self):
        if self.estimator is None:
            estimator = DecisionTreeRegressor()
        else:
            estimator = self.estimator
        return {"allow_nan": _safe_tags(estimator, "allow_nan")}

Ancestors

sklearn.base.RegressorMixin
crossense.ensemble._bagging.BaseCrossBagging
sklearn.ensemble._base.BaseEnsemble
sklearn.base.MetaEstimatorMixin
sklearn.base.BaseEstimator
sklearn.utils._metadata_requests._MetadataRequester

Methods

def predict(self, X)

Predict regression target for X.

The predicted regression target of an input sample is computed as the mean predicted regression targets of the estimators in the ensemble.

Parameters

X : {array-like, sparse matrix} of shape (n_samples, n_features): The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.

Returns

y : ndarray of shape (n_samples,): The predicted values.

Expand source code

def predict(self, X):
    """Predict regression target for X.

    The predicted regression target of an input sample is computed as the
    mean predicted regression targets of the estimators in the ensemble.

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The training input samples. Sparse matrices are accepted only if
        they are supported by the base estimator.

    Returns
    -------
    y : ndarray of shape (n_samples,)
        The predicted values.
    """
    all_y_hat = self.predict_all(X)
    # Reduce
    y_hat = sum(all_y_hat) / self.n_estimators

    return y_hat

def predict_all(self, X)

Predict regression target of all models for X.

Parameters

X : {array-like, sparse matrix} of shape (n_samples, n_features): The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.

Returns

p : ndarray of shape (n_estimators, n_samples, ): The predicted values.

Expand source code

def predict_all(self, X):
    """Predict regression target of all models for X.

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The training input samples. Sparse matrices are accepted only if
        they are supported by the base estimator.

    Returns
    -------
    p : ndarray of shape (n_estimators, n_samples, )
        The predicted values.
    """
    # noinspection DuplicatedCode
    check_is_fitted(self)
    # Check data
    X = self._validate_data(
        X,
        accept_sparse=["csr", "csc"],
        dtype=None,
        force_all_finite=False,
        reset=False,
    )

    # Parallel loop
    n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)

    all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
        delayed(_parallel_predict_regression)(
            self.estimators_[starts[i] : starts[i + 1]],
            X,
        )
        for i in range(n_jobs)
    )
    all_y_hat = list(itertools.chain.from_iterable(all_y_hat))
    return np.concatenate([x[np.newaxis, :] for x in all_y_hat], axis=0)

def set_fit_request(self: crossense.ensemble._bagging.CrossBaggingRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.CrossBaggingRegressor

Request metadata passed to the fit method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

True: metadata is requested, and passed to fit if provided. The request is ignored if metadata is not provided.
False: metadata is not requested and the meta-estimator will not pass it to fit.
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED: Metadata routing for sample_weight parameter in fit.

Returns

self : object: The updated object.

Expand source code

def func(**kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
            f" are: {set(self.keys)}"
        )

    requests = instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    instance._metadata_request = requests

    return instance

def set_score_request(self: crossense.ensemble._bagging.CrossBaggingRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.CrossBaggingRegressor

Request metadata passed to the score method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided.
False: metadata is not requested and the meta-estimator will not pass it to score.
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED: Metadata routing for sample_weight parameter in score.

Returns

self : object: The updated object.

Expand source code

def func(**kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
            f" are: {set(self.keys)}"
        )

    requests = instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    instance._metadata_request = requests

    return instance