Skip to content

Commit 13b20df

Browse files
authored
chore: class inheritance order (#725)
* chore: class inheritance order * skip tests, bump min narwhals
1 parent a67ca76 commit 13b20df

23 files changed

+45
-43
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ maintainers = [
2020
]
2121

2222
dependencies = [
23-
"narwhals>=1.2.0",
23+
"narwhals>=1.5.0",
2424
"pandas>=1.1.5",
2525
"scikit-learn>=1.0",
2626
"importlib-metadata >= 1.0; python_version < '3.8'",

sklego/common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55
import numpy as np
66
import pandas as pd
7-
from sklearn.base import TransformerMixin
7+
from sklearn.base import BaseEstimator, TransformerMixin
88
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
99

1010

11-
class TrainOnlyTransformerMixin(TransformerMixin):
11+
class TrainOnlyTransformerMixin(TransformerMixin, BaseEstimator):
1212
"""Mixin class for transformers that can handle training and test data differently.
1313
1414
This mixin allows using a separate function for transforming training and test data.

sklego/decomposition/pca_reconstruction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
55

66

7-
class PCAOutlierDetection(BaseEstimator, OutlierMixin):
7+
class PCAOutlierDetection(OutlierMixin, BaseEstimator):
88
"""`PCAOutlierDetection` is an outlier detector based on the reconstruction error from PCA.
99
1010
If the difference between original and reconstructed data is larger than the `threshold`, the point is

sklego/decomposition/umap_reconstruction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
1212

1313

14-
class UMAPOutlierDetection(BaseEstimator, OutlierMixin):
14+
class UMAPOutlierDetection(OutlierMixin, BaseEstimator):
1515
"""`UMAPOutlierDetection` is an outlier detector based on the reconstruction error from UMAP.
1616
1717
If the difference between original and reconstructed data is larger than the `threshold`, the point is

sklego/dummy.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
)
1212

1313

14-
class RandomRegressor(BaseEstimator, RegressorMixin):
14+
class RandomRegressor(RegressorMixin, BaseEstimator):
1515
"""A `RandomRegressor` makes random predictions only based on the `y` value that is seen.
1616
1717
The goal is that such a regressor can be used for benchmarking. It _should be_ easily beatable.

sklego/linear_model.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
)
2828

2929

30-
class LowessRegression(BaseEstimator, RegressorMixin):
30+
class LowessRegression(RegressorMixin, BaseEstimator):
3131
"""`LowessRegression` estimator: LOWESS (Locally Weighted Scatterplot Smoothing) is a type of
3232
[local regression](https://en.wikipedia.org/wiki/Local_regression).
3333
@@ -155,7 +155,7 @@ def predict(self, X):
155155
return results
156156

157157

158-
class ProbWeightRegression(BaseEstimator, RegressorMixin):
158+
class ProbWeightRegression(RegressorMixin, BaseEstimator):
159159
"""`ProbWeightRegression` assumes that all input signals in `X` need to be reweighted with weights that sum up to
160160
one in order to predict `y`.
161161
@@ -276,7 +276,7 @@ def coefs_(self):
276276
return self.coef_
277277

278278

279-
class DeadZoneRegressor(BaseEstimator, RegressorMixin):
279+
class DeadZoneRegressor(RegressorMixin, BaseEstimator):
280280
r"""The `DeadZoneRegressor` estimator implements a regression model that incorporates a _dead zone effect_ for
281281
improving the robustness of regression predictions.
282282
@@ -480,7 +480,7 @@ def allowed_effects(self):
480480
return self._ALLOWED_EFFECTS
481481

482482

483-
class _FairClassifier(BaseEstimator, LinearClassifierMixin):
483+
class _FairClassifier(LinearClassifierMixin, BaseEstimator):
484484
"""Base class for fair classifiers that address sensitive attribute fairness.
485485
486486
This base class provides a foundation for fair classifiers that aim to mitigate bias and discrimination by taking
@@ -682,7 +682,7 @@ def _more_tags(self):
682682
return {"poor_score": True}
683683

684684

685-
class DemographicParityClassifier(BaseEstimator, LinearClassifierMixin):
685+
class DemographicParityClassifier(LinearClassifierMixin, BaseEstimator):
686686
r"""`DemographicParityClassifier` is a logistic regression classifier which can be constrained on demographic
687687
parity (p% score).
688688
@@ -800,7 +800,7 @@ def constraints(self, y_hat, y_true, sensitive, n_obs):
800800
return []
801801

802802

803-
class EqualOpportunityClassifier(BaseEstimator, LinearClassifierMixin):
803+
class EqualOpportunityClassifier(LinearClassifierMixin, BaseEstimator):
804804
r"""`EqualOpportunityClassifier` is a logistic regression classifier which can be constrained on equal opportunity
805805
score.
806806
@@ -914,7 +914,7 @@ def constraints(self, y_hat, y_true, sensitive, n_obs):
914914
return []
915915

916916

917-
class BaseScipyMinimizeRegressor(BaseEstimator, RegressorMixin, ABC):
917+
class BaseScipyMinimizeRegressor(RegressorMixin, BaseEstimator, ABC):
918918
"""Abstract base class for regressors relying on Scipy's
919919
[minimize method](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) to minimize a
920920
(custom) loss function.

sklego/meta/_grouped_utils.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ def parse_X_y(X, y, groups, check_X=True, **kwargs) -> nw.DataFrame:
3333

3434
# Convert y and assign it to the frame
3535
n_samples = X.shape[0]
36-
y_series = nw.from_dict(
37-
data={"tmp": [None] * n_samples if y is None else y}, native_namespace=nw.get_native_namespace(X)
38-
)["tmp"]
36+
y_series = nw.new_series(
37+
name="tmp", values=[None] * n_samples if y is None else y, native_namespace=nw.get_native_namespace(X)
38+
)
3939

4040
return X.with_columns(__sklego_target__=y_series)
4141

sklego/meta/confusion_balancer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sklego.base import ProbabilisticClassifier
88

99

10-
class ConfusionBalancer(BaseEstimator, MetaEstimatorMixin, ClassifierMixin):
10+
class ConfusionBalancer(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
1111
r"""The `ConfusionBalancer` estimator attempts to give it's child estimator a more balanced output by learning from
1212
the confusion matrix during training.
1313

sklego/meta/grouped_predictor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ def _more_tags(self):
402402
return {"allow_nan": True}
403403

404404

405-
class GroupedRegressor(GroupedPredictor, RegressorMixin):
405+
class GroupedRegressor(RegressorMixin, GroupedPredictor):
406406
"""`GroupedRegressor` is a meta-estimator that fits a separate regressor for each group in the input data.
407407
408408
Its spec is the same as [`GroupedPredictor`][sklego.meta.grouped_predictor.GroupedPredictor] but it is available
@@ -439,7 +439,7 @@ def fit(self, X, y):
439439
return super().fit(X, y)
440440

441441

442-
class GroupedClassifier(GroupedPredictor, ClassifierMixin):
442+
class GroupedClassifier(ClassifierMixin, GroupedPredictor):
443443
"""`GroupedClassifier` is a meta-estimator that fits a separate classifier for each group in the input data.
444444
445445
Its equivalent to [`GroupedPredictor`][sklego.meta.grouped_predictor.GroupedPredictor] with `shrinkage=None`

sklego/meta/hierarchical_predictor.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,10 @@ def fit(self, X, y=None):
282282
raise ValueError(msg)
283283

284284
native_namespace = nw.get_native_namespace(X)
285-
target_series = nw.from_dict({self._TARGET_NAME: y}, native_namespace=native_namespace)[self._TARGET_NAME]
286-
global_series = nw.from_dict({self._GLOBAL_NAME: np.ones(n_samples)}, native_namespace=native_namespace)[
287-
self._GLOBAL_NAME
288-
]
285+
target_series = nw.new_series(name=self._TARGET_NAME, values=y, native_namespace=native_namespace)
286+
global_series = nw.new_series(
287+
name=self._GLOBAL_NAME, values=np.ones(n_samples), native_namespace=native_namespace
288+
)
289289
frame = X.with_columns(
290290
**{
291291
self._TARGET_NAME: target_series,
@@ -322,9 +322,9 @@ def _predict_estimators(self, X, method_name):
322322

323323
n_samples = X.shape[0]
324324
native_namespace = nw.get_native_namespace(X)
325-
global_series = nw.from_dict({self._GLOBAL_NAME: np.ones(n_samples)}, native_namespace=native_namespace)[
326-
self._GLOBAL_NAME
327-
]
325+
global_series = nw.new_series(
326+
name=self._GLOBAL_NAME, values=np.ones(n_samples), native_namespace=native_namespace
327+
)
328328

329329
frame = X.with_columns(
330330
**{
@@ -424,7 +424,7 @@ def _more_tags(self):
424424
return {"allow_nan": True}
425425

426426

427-
class HierarchicalRegressor(HierarchicalPredictor, RegressorMixin):
427+
class HierarchicalRegressor(RegressorMixin, HierarchicalPredictor):
428428
"""A hierarchical regressor that predicts values using hierarchical grouping.
429429
430430
This class extends [`HierarchicalPredictor`][sklego.meta.hierarchical_predictor.HierarchicalPredictor] and adds
@@ -537,7 +537,7 @@ def predict(self, X):
537537
return self._predict_estimators(X, "predict")
538538

539539

540-
class HierarchicalClassifier(HierarchicalPredictor, ClassifierMixin):
540+
class HierarchicalClassifier(ClassifierMixin, HierarchicalPredictor):
541541
"""A hierarchical classifier that predicts labels using hierarchical grouping.
542542
543543
This class extends [`HierarchicalPredictor`][sklego.meta.hierarchical_predictor.HierarchicalPredictor] and adds

sklego/meta/outlier_classifier.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sklego.base import OutlierModel
88

99

10-
class OutlierClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
10+
class OutlierClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
1111
"""Morphs an outlier detection model into a classifier.
1212
1313
When an outlier is detected it will output 1 and 0 otherwise. This way you can use familiar metrics again and this

sklego/meta/regression_outlier_detector.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from sklearn.utils.validation import check_array, check_is_fitted
66

77

8-
class RegressionOutlierDetector(BaseEstimator, OutlierMixin):
8+
class RegressionOutlierDetector(OutlierMixin, BaseEstimator):
99
"""Morphs a regression estimator into one that can detect outliers. We will try to predict `column` in X.
1010
1111
Parameters

sklego/meta/subjective_classifier.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted, check_X_y
77

88

9-
class SubjectiveClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
9+
class SubjectiveClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
1010
"""Corrects predictions of the inner classifier by taking into account a (subjective) prior distribution of the
1111
classes.
1212

sklego/meta/thresholder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from sklego.base import ProbabilisticClassifier
1111

1212

13-
class Thresholder(BaseEstimator, ClassifierMixin):
13+
class Thresholder(ClassifierMixin, BaseEstimator):
1414
"""Takes a binary classifier and moves the threshold. This way you might design the algorithm to only accept a
1515
certain class if the probability for it is larger than, say, 90% instead of 50%.
1616

sklego/meta/zero_inflated_regressor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from sklearn.utils.validation import _check_sample_weight, check_array, check_is_fitted, check_X_y
99

1010

11-
class ZeroInflatedRegressor(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
11+
class ZeroInflatedRegressor(RegressorMixin, BaseEstimator, MetaEstimatorMixin):
1212
"""A meta regressor for zero-inflated datasets, i.e. the targets contain a lot of zeroes.
1313
1414
`ZeroInflatedRegressor` consists of a classifier and a regressor.

sklego/mixture/bayesian_gmm_classifier.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
88

99

10-
class BayesianGMMClassifier(BaseEstimator, ClassifierMixin):
10+
class BayesianGMMClassifier(ClassifierMixin, BaseEstimator):
1111
"""The `BayesianGMMClassifier` trains a Gaussian Mixture Model for each class in `y` on a dataset `X`.
1212
Once a density is trained for each class we can evaluate the likelihood scores to see which class is more likely.
1313

sklego/mixture/gmm_classifier.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
88

99

10-
class GMMClassifier(BaseEstimator, ClassifierMixin):
10+
class GMMClassifier(ClassifierMixin, BaseEstimator):
1111
"""The `GMMClassifier` trains a Gaussian Mixture Model for each class in `y` on a dataset `X`. Once a density is
1212
trained for each class we can evaluate the likelihood scores to see which class is more likely.
1313

sklego/model_selection.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,8 @@ def update_split_info(indices, j, part, summary):
263263

264264
j = 0
265265
for i in self.split(nw.to_native(X)):
266-
train_info = nw.to_native(nw.from_dict({"tmp": i[0]}, native_namespace=native_namespace)["tmp"])
267-
valid_info = nw.to_native(nw.from_dict({"tmp": i[1]}, native_namespace=native_namespace)["tmp"])
266+
train_info = nw.to_native(nw.new_series(name="tmp", values=i[0], native_namespace=native_namespace))
267+
valid_info = nw.to_native(nw.new_series(name="tmp", values=i[1], native_namespace=native_namespace))
268268
update_split_info(train_info, j, "train", summary)
269269
update_split_info(valid_info, j, "valid", summary)
270270
j = j + 1

sklego/naive_bayes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
99

1010

11-
class GaussianMixtureNB(BaseEstimator, ClassifierMixin):
11+
class GaussianMixtureNB(ClassifierMixin, BaseEstimator):
1212
"""The `GaussianMixtureNB` estimator is a naive bayes classifier that uses a mixture of gaussians instead of
1313
merely a single one. In particular it trains a `GaussianMixture` model for each class in the target and for each
1414
feature in the data, on the subset of `X` where `y == class`.
@@ -158,7 +158,7 @@ def num_fit_cols_(self):
158158
return self.n_features_in_
159159

160160

161-
class BayesianGaussianMixtureNB(BaseEstimator, ClassifierMixin):
161+
class BayesianGaussianMixtureNB(ClassifierMixin, BaseEstimator):
162162
"""The `BayesianGaussianMixtureNB` estimator is a naive bayes classifier that uses a bayesian mixture of gaussians
163163
instead of merely a single one. In particular it trains a `BayesianGaussianMixture` model for each class in the
164164
target and for each feature in the data, on the subset of `X` where `y == class`.

sklego/neighbors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
77

88

9-
class BayesianKernelDensityClassifier(BaseEstimator, ClassifierMixin):
9+
class BayesianKernelDensityClassifier(ClassifierMixin, BaseEstimator):
1010
"""The `BayesianKernelDensityClassifier` estimator trains using Kernel Density estimations to generate the joint
1111
distribution.
1212

sklego/preprocessing/pandastransformers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def _nw_select_dtypes(include: str | list[str], exclude: str | list[str], schema
6060
return feature_names
6161

6262

63-
class ColumnDropper(BaseEstimator, TransformerMixin):
63+
class ColumnDropper(TransformerMixin, BaseEstimator):
6464
"""The `ColumnDropper` transformer allows dropping specific columns from a DataFrame by name.
6565
Can be useful in a sklearn Pipeline.
6666
@@ -226,7 +226,7 @@ def _check_column_names(self, X):
226226
raise KeyError(f"{list(non_existent_columns)} column(s) not in DataFrame")
227227

228228

229-
class TypeSelector(BaseEstimator, TransformerMixin):
229+
class TypeSelector(TransformerMixin, BaseEstimator):
230230
"""The `TypeSelector` transformer allows to select columns in a DataFrame based on their type.
231231
Can be useful in a sklearn Pipeline.
232232
@@ -412,7 +412,7 @@ def __init__(self, include=None, exclude=None):
412412
super().__init__(include=include, exclude=exclude)
413413

414414

415-
class ColumnSelector(BaseEstimator, TransformerMixin):
415+
class ColumnSelector(TransformerMixin, BaseEstimator):
416416
"""The `ColumnSelector` transformer allows selecting specific columns from a DataFrame by name.
417417
Can be useful in a sklearn Pipeline.
418418

tests/test_meta/test_grouped_predictor.py

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def test_sklearn_compatible_estimator(estimator, check):
3232
"check_fit2d_predict1d", # custom message
3333
"check_estimators_empty_data_messages", # custom message
3434
"check_supervised_y_2d", # TODO: Is it possible to support multioutput?
35+
"check_requires_y_none",
3536
}:
3637
pytest.skip()
3738

tests/test_meta/test_hierarchical_predictor.py

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def test_sklearn_compatible_estimator(estimator, check):
3131
"check_fit2d_1feature", # custom message
3232
"check_supervised_y_2d", # TODO: Is it possible to support multioutput?
3333
"check_estimators_empty_data_messages", # custom message
34+
"check_requires_y_none",
3435
}:
3536
pytest.skip()
3637

0 commit comments

Comments
 (0)