Module crossense.ensemble
The :mod:crossense.ensemble module includes ensemble-based methods for
classification, regression and anomaly detection.
Expand source code
"""
The :mod:`crossense.ensemble` module includes ensemble-based methods for
classification, regression and anomaly detection.
"""
from ._bagging import BaseCrossBagging, CrossBaggingClassifier, CrossBaggingRegressor
__all__ = [
"BaseCrossBagging",
"CrossBaggingClassifier",
"CrossBaggingRegressor",
]
Sub-modules
crossense.ensemble.tests
Classes
class BaseCrossBagging (estimator=None, cv=5, *, n_jobs=None, verbose=0)-
Base class for cross-fold Bagging meta-estimator.
Warning: This class should not be used directly. Use derived classes instead.
Expand source code
class BaseCrossBagging(BaseEnsemble, metaclass=ABCMeta): """Base class for cross-fold Bagging meta-estimator. Warning: This class should not be used directly. Use derived classes instead. """ _parameter_constraints: dict = { "estimator": [HasMethods(["fit", "predict"]), None], "n_jobs": [None, Integral], "random_state": ["random_state"], "verbose": ["verbose"], } @abstractmethod def __init__( self, estimator=None, cv=5, *, n_jobs=None, verbose=0, ): self.cv: _BaseKFold = check_cv(cv, classifier=is_classifier(estimator)) super().__init__( estimator=estimator, n_estimators=self.cv.n_splits, ) self.n_jobs = n_jobs self.verbose = verbose self.estimators_ = [] self.estimators_samples_ = [] @_fit_context( # BaseBagging.estimator is not validated yet prefer_skip_nested_validation=False ) def fit(self, X, y, sample_weight=None): """Build a Bagging ensemble of estimators from the training set (X, y). Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. y : array-like of shape (n_samples,) The target values (class labels in classification, real numbers in regression). sample_weight : array-like of shape (n_samples,), default=None Sample weights. If None, then samples are equally weighted. Note that this is supported only if the base estimator supports sample weighting. """ # Convert data (X is required to be 2d and indexable) X, y = self._validate_data( X, y, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, multi_output=True, ) return self._fit(X, y, sample_weight=sample_weight) # noinspection PyMethodMayBeStatic def _parallel_args(self): return {} def _fit( self, X, y, max_depth=None, sample_weight=None, check_input=True, ): """Build a Bagging ensemble of estimators from the training set (X, y). Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. y : array-like of shape (n_samples,) The target values (class labels in classification, real numbers in regression). max_depth : int, default=None Override value used when constructing base estimator. Only supported if the base estimator has a max_depth parameter. sample_weight : array-like of shape (n_samples,), default=None Sample weights. If None, then samples are equally weighted. Note that this is supported only if the base estimator supports sample weighting. check_input : bool, default=True Override value used when fitting base estimator. Only supported if the base estimator has a check_input parameter for fit function. """ self._generate_fold_indices(X, y, None) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=None) # Remap output n_samples = X.shape[0] self._n_samples = n_samples y = self._validate_y(y) # Check parameters self._validate_estimator() if max_depth is not None: self.estimator_.max_depth = max_depth # Parallel loop n_jobs, n_estimators, starts = _partition_estimators( self.n_estimators, self.n_jobs ) total_n_estimators = sum(n_estimators) all_results = Parallel( n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args() )( delayed(_parallel_build_estimators)( n_estimators[i], self, self.estimators_samples_[starts[i] : starts[i + 1]], X, y, sample_weight, total_n_estimators, verbose=self.verbose, check_input=check_input, ) for i in range(n_jobs) ) # Reduce self.estimators_ = list( itertools.chain.from_iterable(t[0] for t in all_results) ) return self # noinspection PyMethodMayBeStatic def _validate_y(self, y): if len(y.shape) == 1 or y.shape[1] == 1: return column_or_1d(y, warn=True) return y def _generate_fold_indices(self, X, y, groups): check_is_fitted(self) for fold in self.cv.split(X, y, groups): self.estimators_samples_.append(fold[0]) def set_params(self, **params): cv = params.pop("cv", None) if cv: self.cv = check_cv(cv, classifier=is_classifier(self.estimator)) return super().set_params(**params)Ancestors
- sklearn.ensemble._base.BaseEnsemble
- sklearn.base.MetaEstimatorMixin
- sklearn.base.BaseEstimator
- sklearn.utils._metadata_requests._MetadataRequester
Subclasses
- crossense.ensemble._bagging.CrossBaggingClassifier
- crossense.ensemble._bagging.CrossBaggingRegressor
Methods
def fit(self, X, y, sample_weight=None)-
Build a Bagging ensemble of estimators from the training set (X, y).
Parameters
X:{array-like, sparse matrix}ofshape (n_samples, n_features)- The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.
y:array-likeofshape (n_samples,)- The target values (class labels in classification, real numbers in regression).
sample_weight:array-likeofshape (n_samples,), default=None- Sample weights. If None, then samples are equally weighted. Note that this is supported only if the base estimator supports sample weighting.
Expand source code
@_fit_context( # BaseBagging.estimator is not validated yet prefer_skip_nested_validation=False ) def fit(self, X, y, sample_weight=None): """Build a Bagging ensemble of estimators from the training set (X, y). Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. y : array-like of shape (n_samples,) The target values (class labels in classification, real numbers in regression). sample_weight : array-like of shape (n_samples,), default=None Sample weights. If None, then samples are equally weighted. Note that this is supported only if the base estimator supports sample weighting. """ # Convert data (X is required to be 2d and indexable) X, y = self._validate_data( X, y, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, multi_output=True, ) return self._fit(X, y, sample_weight=sample_weight) def set_fit_request(self: crossense.ensemble._bagging.BaseCrossBagging, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.BaseCrossBagging-
Request metadata passed to the
fitmethod.Note that this method is only relevant if
enable_metadata_routing=True(see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing>on how the routing mechanism works.The options for each parameter are:
-
True: metadata is requested, and passed tofitif provided. The request is ignored if metadata is not provided. -
False: metadata is not requested and the meta-estimator will not pass it tofit. -
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline. Otherwise it has no effect.Parameters
sample_weight:str, True, False,orNone, default=sklearn.utils.metadata_routing.UNCHANGED- Metadata routing for
sample_weightparameter infit.
Returns
self:object- The updated object.
Expand source code
def func(**kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments" f" are: {set(self.keys)}" ) requests = instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) instance._metadata_request = requests return instance -
def set_params(self, **params)-
Set the parameters of this estimator.
The method works on simple estimators as well as on nested objects (such as :class:
~sklearn.pipeline.Pipeline). The latter have parameters of the form<component>__<parameter>so that it's possible to update each component of a nested object.Parameters
**params:dict- Estimator parameters.
Returns
self:estimator instance- Estimator instance.
Expand source code
def set_params(self, **params): cv = params.pop("cv", None) if cv: self.cv = check_cv(cv, classifier=is_classifier(self.estimator)) return super().set_params(**params)
class CrossBaggingClassifier (estimator: object = None, cv: Union[int, BaseCrossValidator, Iterable] = 5, *, n_jobs: Optional[int] = None, verbose=0)-
A cross-validation Bagging classifier.
A Bagging classifier is an ensemble meta-estimator that fits base classifiers each on a fold of cross-validation generator
Attributes
estimator_:estimator- The base estimator from which the ensemble is grown.
n_features_in_:int- Number of features seen during :term:
fit. feature_names_in_:ndarrayofshape (n_features_in_,)- Names of features seen during :term:
fit. Defined only whenXhas feature names that are all strings. estimators_:listofestimators- The collection of fitted base estimators.
estimators_samples_:listofarrays- The subset of drawn samples (i.e., the in-bag samples) for each base estimator. Each subset is defined by an array of the indices selected.
classes_:ndarrayofshape (n_classes,)- The classes labels.
n_classes_:intorlist- The number of classes.
Examples
>>> from sklearn.svm import SVC >>> from crossense.ensemble import CrossBaggingClassifier >>> from sklearn.datasets import make_classification >>> X, y = make_classification(n_samples=100, n_features=4, ... n_informative=2, n_redundant=0, ... random_state=0, shuffle=False) >>> clf = CrossBaggingClassifier(estimator=SVC(), cv=5).fit(X, y) >>> clf.predict([[0, 0, 0, 0]]) array([1])Parameters
estimator: The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:
~sklearn.tree.DecisionTreeClassifier.cv: Determines the cross-validation splitting strategy. Possible inputs for cv are:
- <code>None</code>, to use the default 5-fold cross validation, - int, to specify the number of folds in a <code>(Stratified)KFold</code>, - :term:<code>CV splitter</code>, - An iterable that generates (train, test) splits as arrays of indices. For <code>int</code>/<code>None</code> inputs, if the estimator is a classifier and <code>y</code> is either binary or multiclass, :class:<code>StratifiedKFold</code> is used. In all other cases, :class:<code>KFold</code> is used. These splitters are instantiated with `shuffle=False` so the splits will be the same across calls. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here.n_jobs: The number of jobs to run in parallel for both :meth:
fitand :meth:predict.Nonemeans 1 unless in a :obj:joblib.parallel_backendcontext.-1means using all processors. See :term:Glossary <n_jobs>for more details.verbose: Controls the verbosity when fitting and predicting.
Expand source code
class CrossBaggingClassifier(ClassifierMixin, BaseCrossBagging): """A cross-validation Bagging classifier. A Bagging classifier is an ensemble meta-estimator that fits base classifiers each on a fold of cross-validation generator Attributes ---------- estimator_ : estimator The base estimator from which the ensemble is grown. n_features_in_ : int Number of features seen during :term:`fit`. feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. estimators_ : list of estimators The collection of fitted base estimators. estimators_samples_ : list of arrays The subset of drawn samples (i.e., the in-bag samples) for each base estimator. Each subset is defined by an array of the indices selected. classes_ : ndarray of shape (n_classes,) The classes labels. n_classes_ : int or list The number of classes. Examples -------- >>> from sklearn.svm import SVC >>> from crossense.ensemble import CrossBaggingClassifier >>> from sklearn.datasets import make_classification >>> X, y = make_classification(n_samples=100, n_features=4, ... n_informative=2, n_redundant=0, ... random_state=0, shuffle=False) >>> clf = CrossBaggingClassifier(estimator=SVC(), cv=5).fit(X, y) >>> clf.predict([[0, 0, 0, 0]]) array([1]) """ def __init__( self, estimator: object = None, cv: Union[int, BaseCrossValidator, Iterable] = 5, *, n_jobs: Optional[int] = None, verbose=0, ): """ Parameters ---------- estimator: The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:`~sklearn.tree.DecisionTreeClassifier`. cv: Determines the cross-validation splitting strategy. Possible inputs for cv are: - `None`, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable that generates (train, test) splits as arrays of indices. For `int`/`None` inputs, if the estimator is a classifier and `y` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. These splitters are instantiated with `shuffle=False` so the splits will be the same across calls. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs: The number of jobs to run in parallel for both :meth:`fit` and :meth:`predict`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. verbose: Controls the verbosity when fitting and predicting. """ super().__init__( estimator=estimator, cv=cv, n_jobs=n_jobs, verbose=verbose, ) def _validate_estimator(self, default=None): """Check the estimator and set the estimator_ attribute.""" super()._validate_estimator(default=DecisionTreeClassifier()) def _validate_y(self, y): y = column_or_1d(y, warn=True) check_classification_targets(y) self.classes_, y = np.unique(y, return_inverse=True) self.n_classes_ = len(self.classes_) return y def predict_all_proba(self, X): """Predict class probabilities of all models for X. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- p : ndarray of shape (n_estimators, n_samples, n_classes) The class probabilities of the input samples. The order of the classes corresponds to that in the attribute :term:`classes_`. """ check_is_fitted(self) # Check data X = self._validate_data( X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, reset=False, ) # Parallel loop n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) all_proba = Parallel( n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args() )( delayed(_parallel_predict_proba)( self.estimators_[starts[i] : starts[i + 1]], X, self.n_classes_, ) for i in range(n_jobs) ) all_proba = list(itertools.chain.from_iterable(all_proba)) return np.concatenate([x[np.newaxis, :, :] for x in all_proba], axis=0) def predict(self, X): """Predict class for X. The predicted class of an input sample is computed as the class with the highest mean predicted probability. If base estimators do not implement a ``predict_proba`` method, then it resorts to voting. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- y : ndarray of shape (n_samples,) The predicted classes. """ predicted_probabilitiy = self.predict_proba(X) return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)), axis=0) def predict_proba(self, X): """Predict class probabilities for X. The predicted class probabilities of an input sample is computed as the mean predicted class probabilities of the base estimators in the ensemble. If base estimators do not implement a ``predict_proba`` method, then it resorts to voting and the predicted class probabilities of an input sample represents the proportion of estimators predicting each class. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- p : ndarray of shape (n_samples, n_classes) The class probabilities of the input samples. The order of the classes corresponds to that in the attribute :term:`classes_`. """ all_proba = self.predict_all_proba(X) # Reduce proba = all_proba.mean(axis=0) return proba def predict_log_proba(self, X): """Predict class log-probabilities for X. The predicted class log-probabilities of an input sample is computed as the log of the mean predicted class probabilities of the base estimators in the ensemble. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- p : ndarray of shape (n_samples, n_classes) The class log-probabilities of the input samples. The order of the classes corresponds to that in the attribute :term:`classes_`. """ check_is_fitted(self) if hasattr(self.estimator_, "predict_log_proba"): # Check data X = self._validate_data( X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, reset=False, ) # Parallel loop n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)( delayed(_parallel_predict_log_proba)( self.estimators_[starts[i] : starts[i + 1]], X, self.n_classes_, ) for i in range(n_jobs) ) # Reduce log_proba = all_log_proba[0] for j in range(1, len(all_log_proba)): log_proba = np.logaddexp(log_proba, all_log_proba[j]) log_proba -= np.log(self.n_estimators) else: log_proba = np.log(self.predict_proba(X)) return log_proba @available_if(_estimator_has("decision_function")) def decision_function(self, X): """Average of the decision functions of the base classifiers. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- score : ndarray of shape (n_samples, k) The decision function of the input samples. The columns correspond to the classes in sorted order, as they appear in the attribute ``classes_``. Regression and binary classification are special cases with ``k == 1``, otherwise ``k==n_classes``. """ # noinspection DuplicatedCode check_is_fitted(self) # Check data X = self._validate_data( X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, reset=False, ) # Parallel loop n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)( delayed(_parallel_decision_function)( self.estimators_[starts[i] : starts[i + 1]], X, ) for i in range(n_jobs) ) # Reduce decisions = sum(all_decisions) / self.n_estimators return decisions def _more_tags(self): if self.estimator is None: estimator = DecisionTreeClassifier() else: estimator = self.estimator return {"allow_nan": _safe_tags(estimator, "allow_nan")}Ancestors
- sklearn.base.ClassifierMixin
- crossense.ensemble._bagging.BaseCrossBagging
- sklearn.ensemble._base.BaseEnsemble
- sklearn.base.MetaEstimatorMixin
- sklearn.base.BaseEstimator
- sklearn.utils._metadata_requests._MetadataRequester
Methods
def decision_function(self, X)-
Average of the decision functions of the base classifiers.
Parameters
X:{array-like, sparse matrix}ofshape (n_samples, n_features)- The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.
Returns
score:ndarrayofshape (n_samples, k)- The decision function of the input samples. The columns correspond
to the classes in sorted order, as they appear in the attribute
classes_. Regression and binary classification are special cases withk == 1, otherwisek==n_classes.
Expand source code
@available_if(_estimator_has("decision_function")) def decision_function(self, X): """Average of the decision functions of the base classifiers. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- score : ndarray of shape (n_samples, k) The decision function of the input samples. The columns correspond to the classes in sorted order, as they appear in the attribute ``classes_``. Regression and binary classification are special cases with ``k == 1``, otherwise ``k==n_classes``. """ # noinspection DuplicatedCode check_is_fitted(self) # Check data X = self._validate_data( X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, reset=False, ) # Parallel loop n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)( delayed(_parallel_decision_function)( self.estimators_[starts[i] : starts[i + 1]], X, ) for i in range(n_jobs) ) # Reduce decisions = sum(all_decisions) / self.n_estimators return decisions def predict(self, X)-
Predict class for X.
The predicted class of an input sample is computed as the class with the highest mean predicted probability. If base estimators do not implement a
predict_probamethod, then it resorts to voting.Parameters
X:{array-like, sparse matrix}ofshape (n_samples, n_features)- The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.
Returns
y:ndarrayofshape (n_samples,)- The predicted classes.
Expand source code
def predict(self, X): """Predict class for X. The predicted class of an input sample is computed as the class with the highest mean predicted probability. If base estimators do not implement a ``predict_proba`` method, then it resorts to voting. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- y : ndarray of shape (n_samples,) The predicted classes. """ predicted_probabilitiy = self.predict_proba(X) return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)), axis=0) def predict_all_proba(self, X)-
Predict class probabilities of all models for X.
Parameters
X:{array-like, sparse matrix}ofshape (n_samples, n_features)- The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.
Returns
p:ndarrayofshape (n_estimators, n_samples, n_classes)- The class probabilities of the input samples. The order of the
classes corresponds to that in the attribute :term:
classes_.
Expand source code
def predict_all_proba(self, X): """Predict class probabilities of all models for X. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- p : ndarray of shape (n_estimators, n_samples, n_classes) The class probabilities of the input samples. The order of the classes corresponds to that in the attribute :term:`classes_`. """ check_is_fitted(self) # Check data X = self._validate_data( X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, reset=False, ) # Parallel loop n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) all_proba = Parallel( n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args() )( delayed(_parallel_predict_proba)( self.estimators_[starts[i] : starts[i + 1]], X, self.n_classes_, ) for i in range(n_jobs) ) all_proba = list(itertools.chain.from_iterable(all_proba)) return np.concatenate([x[np.newaxis, :, :] for x in all_proba], axis=0) def predict_log_proba(self, X)-
Predict class log-probabilities for X.
The predicted class log-probabilities of an input sample is computed as the log of the mean predicted class probabilities of the base estimators in the ensemble.
Parameters
X:{array-like, sparse matrix}ofshape (n_samples, n_features)- The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.
Returns
p:ndarrayofshape (n_samples, n_classes)- The class log-probabilities of the input samples. The order of the
classes corresponds to that in the attribute :term:
classes_.
Expand source code
def predict_log_proba(self, X): """Predict class log-probabilities for X. The predicted class log-probabilities of an input sample is computed as the log of the mean predicted class probabilities of the base estimators in the ensemble. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- p : ndarray of shape (n_samples, n_classes) The class log-probabilities of the input samples. The order of the classes corresponds to that in the attribute :term:`classes_`. """ check_is_fitted(self) if hasattr(self.estimator_, "predict_log_proba"): # Check data X = self._validate_data( X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, reset=False, ) # Parallel loop n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)( delayed(_parallel_predict_log_proba)( self.estimators_[starts[i] : starts[i + 1]], X, self.n_classes_, ) for i in range(n_jobs) ) # Reduce log_proba = all_log_proba[0] for j in range(1, len(all_log_proba)): log_proba = np.logaddexp(log_proba, all_log_proba[j]) log_proba -= np.log(self.n_estimators) else: log_proba = np.log(self.predict_proba(X)) return log_proba def predict_proba(self, X)-
Predict class probabilities for X.
The predicted class probabilities of an input sample is computed as the mean predicted class probabilities of the base estimators in the ensemble. If base estimators do not implement a
predict_probamethod, then it resorts to voting and the predicted class probabilities of an input sample represents the proportion of estimators predicting each class.Parameters
X:{array-like, sparse matrix}ofshape (n_samples, n_features)- The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.
Returns
p:ndarrayofshape (n_samples, n_classes)- The class probabilities of the input samples. The order of the
classes corresponds to that in the attribute :term:
classes_.
Expand source code
def predict_proba(self, X): """Predict class probabilities for X. The predicted class probabilities of an input sample is computed as the mean predicted class probabilities of the base estimators in the ensemble. If base estimators do not implement a ``predict_proba`` method, then it resorts to voting and the predicted class probabilities of an input sample represents the proportion of estimators predicting each class. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- p : ndarray of shape (n_samples, n_classes) The class probabilities of the input samples. The order of the classes corresponds to that in the attribute :term:`classes_`. """ all_proba = self.predict_all_proba(X) # Reduce proba = all_proba.mean(axis=0) return proba def set_fit_request(self: crossense.ensemble._bagging.CrossBaggingClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.CrossBaggingClassifier-
Request metadata passed to the
fitmethod.Note that this method is only relevant if
enable_metadata_routing=True(see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing>on how the routing mechanism works.The options for each parameter are:
-
True: metadata is requested, and passed tofitif provided. The request is ignored if metadata is not provided. -
False: metadata is not requested and the meta-estimator will not pass it tofit. -
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline. Otherwise it has no effect.Parameters
sample_weight:str, True, False,orNone, default=sklearn.utils.metadata_routing.UNCHANGED- Metadata routing for
sample_weightparameter infit.
Returns
self:object- The updated object.
Expand source code
def func(**kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments" f" are: {set(self.keys)}" ) requests = instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) instance._metadata_request = requests return instance -
def set_score_request(self: crossense.ensemble._bagging.CrossBaggingClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.CrossBaggingClassifier-
Request metadata passed to the
scoremethod.Note that this method is only relevant if
enable_metadata_routing=True(see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing>on how the routing mechanism works.The options for each parameter are:
-
True: metadata is requested, and passed toscoreif provided. The request is ignored if metadata is not provided. -
False: metadata is not requested and the meta-estimator will not pass it toscore. -
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline. Otherwise it has no effect.Parameters
sample_weight:str, True, False,orNone, default=sklearn.utils.metadata_routing.UNCHANGED- Metadata routing for
sample_weightparameter inscore.
Returns
self:object- The updated object.
Expand source code
def func(**kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments" f" are: {set(self.keys)}" ) requests = instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) instance._metadata_request = requests return instance -
class CrossBaggingRegressor (estimator: object = None, cv: Union[int, BaseCrossValidator, Iterable] = 5, *, n_jobs: Optional[int] = None, verbose=0)-
A cross-validation Bagging regressor.
A Bagging regressor is an ensemble meta-estimator that fits base regressors each on a fold of cross-validation generator
Attributes
estimator_:estimator- The base estimator from which the ensemble is grown.
n_features_in_:int- Number of features seen during :term:
fit. feature_names_in_:ndarrayofshape (n_features_in_,)- Names of features seen during :term:
fit. Defined only whenXhas feature names that are all strings. estimators_:listofestimators- The collection of fitted sub-estimators.
estimators_samples_:listofarrays- The subset of drawn samples (i.e., the in-bag samples) for each base estimator. Each subset is defined by an array of the indices selected.
Examples
>>> from sklearn.svm import SVR >>> from crossense.ensemble import CrossBaggingRegressor >>> from sklearn.datasets import make_regression >>> X, y = make_regression(n_samples=100, n_features=4, ... n_informative=2, n_targets=1, ... random_state=0, shuffle=False) >>> regr = CrossBaggingRegressor(estimator=SVR(), cv=5).fit(X, y) >>> regr.predict([[0, 0, 0, 0]]) array([-2.8720...])Parameters
estimator: The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:
~sklearn.tree.DecisionTreeClassifier.cv: Determines the cross-validation splitting strategy. Possible inputs for cv are:
- <code>None</code>, to use the default 5-fold cross validation, - int, to specify the number of folds in a <code>(Stratified)KFold</code>, - :term:<code>CV splitter</code>, - An iterable that generates (train, test) splits as arrays of indices. For <code>int</code>/<code>None</code> inputs, if the estimator is a classifier and <code>y</code> is either binary or multiclass, :class:<code>StratifiedKFold</code> is used. In all other cases, :class:<code>KFold</code> is used. These splitters are instantiated with `shuffle=False` so the splits will be the same across calls. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here.n_jobs: The number of jobs to run in parallel for both :meth:
fitand :meth:predict.Nonemeans 1 unless in a :obj:joblib.parallel_backendcontext.-1means using all processors. See :term:Glossary <n_jobs>for more details.verbose: Controls the verbosity when fitting and predicting.
Expand source code
class CrossBaggingRegressor(RegressorMixin, BaseCrossBagging): """A cross-validation Bagging regressor. A Bagging regressor is an ensemble meta-estimator that fits base regressors each on a fold of cross-validation generator Attributes ---------- estimator_ : estimator The base estimator from which the ensemble is grown. n_features_in_ : int Number of features seen during :term:`fit`. feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. estimators_ : list of estimators The collection of fitted sub-estimators. estimators_samples_ : list of arrays The subset of drawn samples (i.e., the in-bag samples) for each base estimator. Each subset is defined by an array of the indices selected. Examples -------- >>> from sklearn.svm import SVR >>> from crossense.ensemble import CrossBaggingRegressor >>> from sklearn.datasets import make_regression >>> X, y = make_regression(n_samples=100, n_features=4, ... n_informative=2, n_targets=1, ... random_state=0, shuffle=False) >>> regr = CrossBaggingRegressor(estimator=SVR(), cv=5).fit(X, y) >>> regr.predict([[0, 0, 0, 0]]) array([-2.8720...]) """ def __init__( self, estimator: object = None, cv: Union[int, BaseCrossValidator, Iterable] = 5, *, n_jobs: Optional[int] = None, verbose=0, ): """ Parameters ---------- estimator: The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:`~sklearn.tree.DecisionTreeClassifier`. cv: Determines the cross-validation splitting strategy. Possible inputs for cv are: - `None`, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable that generates (train, test) splits as arrays of indices. For `int`/`None` inputs, if the estimator is a classifier and `y` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. These splitters are instantiated with `shuffle=False` so the splits will be the same across calls. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs: The number of jobs to run in parallel for both :meth:`fit` and :meth:`predict`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. verbose: Controls the verbosity when fitting and predicting. """ super().__init__( estimator=estimator, cv=cv, n_jobs=n_jobs, verbose=verbose, ) def predict_all(self, X): """Predict regression target of all models for X. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- p : ndarray of shape (n_estimators, n_samples, ) The predicted values. """ # noinspection DuplicatedCode check_is_fitted(self) # Check data X = self._validate_data( X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, reset=False, ) # Parallel loop n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)( delayed(_parallel_predict_regression)( self.estimators_[starts[i] : starts[i + 1]], X, ) for i in range(n_jobs) ) all_y_hat = list(itertools.chain.from_iterable(all_y_hat)) return np.concatenate([x[np.newaxis, :] for x in all_y_hat], axis=0) def predict(self, X): """Predict regression target for X. The predicted regression target of an input sample is computed as the mean predicted regression targets of the estimators in the ensemble. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- y : ndarray of shape (n_samples,) The predicted values. """ all_y_hat = self.predict_all(X) # Reduce y_hat = sum(all_y_hat) / self.n_estimators return y_hat # noinspection PyMethodOverriding def _validate_estimator(self): """Check the estimator and set the estimator_ attribute.""" super()._validate_estimator(default=DecisionTreeRegressor()) def _more_tags(self): if self.estimator is None: estimator = DecisionTreeRegressor() else: estimator = self.estimator return {"allow_nan": _safe_tags(estimator, "allow_nan")}Ancestors
- sklearn.base.RegressorMixin
- crossense.ensemble._bagging.BaseCrossBagging
- sklearn.ensemble._base.BaseEnsemble
- sklearn.base.MetaEstimatorMixin
- sklearn.base.BaseEstimator
- sklearn.utils._metadata_requests._MetadataRequester
Methods
def predict(self, X)-
Predict regression target for X.
The predicted regression target of an input sample is computed as the mean predicted regression targets of the estimators in the ensemble.
Parameters
X:{array-like, sparse matrix}ofshape (n_samples, n_features)- The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.
Returns
y:ndarrayofshape (n_samples,)- The predicted values.
Expand source code
def predict(self, X): """Predict regression target for X. The predicted regression target of an input sample is computed as the mean predicted regression targets of the estimators in the ensemble. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- y : ndarray of shape (n_samples,) The predicted values. """ all_y_hat = self.predict_all(X) # Reduce y_hat = sum(all_y_hat) / self.n_estimators return y_hat def predict_all(self, X)-
Predict regression target of all models for X.
Parameters
X:{array-like, sparse matrix}ofshape (n_samples, n_features)- The training input samples. Sparse matrices are accepted only if they are supported by the base estimator.
Returns
p:ndarrayofshape (n_estimators, n_samples, )- The predicted values.
Expand source code
def predict_all(self, X): """Predict regression target of all models for X. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- p : ndarray of shape (n_estimators, n_samples, ) The predicted values. """ # noinspection DuplicatedCode check_is_fitted(self) # Check data X = self._validate_data( X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False, reset=False, ) # Parallel loop n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)( delayed(_parallel_predict_regression)( self.estimators_[starts[i] : starts[i + 1]], X, ) for i in range(n_jobs) ) all_y_hat = list(itertools.chain.from_iterable(all_y_hat)) return np.concatenate([x[np.newaxis, :] for x in all_y_hat], axis=0) def set_fit_request(self: crossense.ensemble._bagging.CrossBaggingRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.CrossBaggingRegressor-
Request metadata passed to the
fitmethod.Note that this method is only relevant if
enable_metadata_routing=True(see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing>on how the routing mechanism works.The options for each parameter are:
-
True: metadata is requested, and passed tofitif provided. The request is ignored if metadata is not provided. -
False: metadata is not requested and the meta-estimator will not pass it tofit. -
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline. Otherwise it has no effect.Parameters
sample_weight:str, True, False,orNone, default=sklearn.utils.metadata_routing.UNCHANGED- Metadata routing for
sample_weightparameter infit.
Returns
self:object- The updated object.
Expand source code
def func(**kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments" f" are: {set(self.keys)}" ) requests = instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) instance._metadata_request = requests return instance -
def set_score_request(self: crossense.ensemble._bagging.CrossBaggingRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> crossense.ensemble._bagging.CrossBaggingRegressor-
Request metadata passed to the
scoremethod.Note that this method is only relevant if
enable_metadata_routing=True(see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing>on how the routing mechanism works.The options for each parameter are:
-
True: metadata is requested, and passed toscoreif provided. The request is ignored if metadata is not provided. -
False: metadata is not requested and the meta-estimator will not pass it toscore. -
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline. Otherwise it has no effect.Parameters
sample_weight:str, True, False,orNone, default=sklearn.utils.metadata_routing.UNCHANGED- Metadata routing for
sample_weightparameter inscore.
Returns
self:object- The updated object.
Expand source code
def func(**kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments" f" are: {set(self.keys)}" ) requests = instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) instance._metadata_request = requests return instance -