Skip to content

Commit

Permalink
repeat all params
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Feb 11, 2025
1 parent 98eb476 commit 3d351a4
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 7 deletions.
2 changes: 1 addition & 1 deletion docs/FAQ.rst
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,6 @@ This pattern will work with ``lightgbm > 4.5.0``.
print(f"mean: {preds.mean():.2f}, max: {preds.max():.2f}")
# mean: -6.81, max: 345.10
preds_trunc = reg_trunc.predict(X, max_score = preds.mean())
preds_trunc = reg_trunc.predict(X, max_score=preds.mean())
print(f"mean: {preds_trunc.mean():.2f}, max: {preds_trunc.max():.2f}")
# mean: -56.50, max: -6.81
150 changes: 144 additions & 6 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1314,8 +1314,54 @@ def feature_names_in_(self) -> None:
class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
"""LightGBM regressor."""

def __init__(self, **kwargs: Any) -> None:
super().__init__(**kwargs)
# NOTE: all args from LGBMModel.__init__() are intentionally repeated here for
# docs, help(), and tab completion.
def __init__(
self,
*,
boosting_type: str = "gbdt",
num_leaves: int = 31,
max_depth: int = -1,
learning_rate: float = 0.1,
n_estimators: int = 100,
subsample_for_bin: int = 200000,
objective: Optional[Union[str, _LGBM_ScikitCustomObjectiveFunction]] = None,
class_weight: Optional[Union[Dict, str]] = None,
min_split_gain: float = 0.0,
min_child_weight: float = 1e-3,
min_child_samples: int = 20,
subsample: float = 1.0,
subsample_freq: int = 0,
colsample_bytree: float = 1.0,
reg_alpha: float = 0.0,
reg_lambda: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None,
n_jobs: Optional[int] = None,
importance_type: str = "split",
**kwargs: Any,
) -> None:
super().__init__(
boosting_type=boosting_type,
num_leaves=num_leaves,
max_depth=max_depth,
learning_rate=learning_rate,
n_estimators=n_estimators,
subsample_for_bin=subsample_for_bin,
objective=objective,
class_weight=class_weight,
min_split_gain=min_split_gain,
min_child_weight=min_child_weight,
min_child_samples=min_child_samples,
subsample=subsample,
subsample_freq=subsample_freq,
colsample_bytree=colsample_bytree,
reg_alpha=reg_alpha,
reg_lambda=reg_lambda,
random_state=random_state,
n_jobs=n_jobs,
importance_type=importance_type,
**kwargs,
)

__init__.__doc__ = LGBMModel.__init__.__doc__

Expand Down Expand Up @@ -1378,8 +1424,54 @@ def fit( # type: ignore[override]
class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
"""LightGBM classifier."""

def __init__(self, **kwargs: Any) -> None:
super().__init__(**kwargs)
# NOTE: all args from LGBMModel.__init__() are intentionally repeated here for
# docs, help(), and tab completion.
def __init__(
self,
*,
boosting_type: str = "gbdt",
num_leaves: int = 31,
max_depth: int = -1,
learning_rate: float = 0.1,
n_estimators: int = 100,
subsample_for_bin: int = 200000,
objective: Optional[Union[str, _LGBM_ScikitCustomObjectiveFunction]] = None,
class_weight: Optional[Union[Dict, str]] = None,
min_split_gain: float = 0.0,
min_child_weight: float = 1e-3,
min_child_samples: int = 20,
subsample: float = 1.0,
subsample_freq: int = 0,
colsample_bytree: float = 1.0,
reg_alpha: float = 0.0,
reg_lambda: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None,
n_jobs: Optional[int] = None,
importance_type: str = "split",
**kwargs: Any,
) -> None:
super().__init__(
boosting_type=boosting_type,
num_leaves=num_leaves,
max_depth=max_depth,
learning_rate=learning_rate,
n_estimators=n_estimators,
subsample_for_bin=subsample_for_bin,
objective=objective,
class_weight=class_weight,
min_split_gain=min_split_gain,
min_child_weight=min_child_weight,
min_child_samples=min_child_samples,
subsample=subsample,
subsample_freq=subsample_freq,
colsample_bytree=colsample_bytree,
reg_alpha=reg_alpha,
reg_lambda=reg_lambda,
random_state=random_state,
n_jobs=n_jobs,
importance_type=importance_type,
**kwargs,
)

__init__.__doc__ = LGBMModel.__init__.__doc__

Expand Down Expand Up @@ -1593,8 +1685,54 @@ class LGBMRanker(LGBMModel):
Please use this class mainly for training and applying ranking models in common sklearnish way.
"""

def __init__(self, **kwargs: Any) -> None:
super().__init__(**kwargs)
# NOTE: all args from LGBMModel.__init__() are intentionally repeated here for
# docs, help(), and tab completion.
def __init__(
self,
*,
boosting_type: str = "gbdt",
num_leaves: int = 31,
max_depth: int = -1,
learning_rate: float = 0.1,
n_estimators: int = 100,
subsample_for_bin: int = 200000,
objective: Optional[Union[str, _LGBM_ScikitCustomObjectiveFunction]] = None,
class_weight: Optional[Union[Dict, str]] = None,
min_split_gain: float = 0.0,
min_child_weight: float = 1e-3,
min_child_samples: int = 20,
subsample: float = 1.0,
subsample_freq: int = 0,
colsample_bytree: float = 1.0,
reg_alpha: float = 0.0,
reg_lambda: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None,
n_jobs: Optional[int] = None,
importance_type: str = "split",
**kwargs: Any,
) -> None:
super().__init__(
boosting_type=boosting_type,
num_leaves=num_leaves,
max_depth=max_depth,
learning_rate=learning_rate,
n_estimators=n_estimators,
subsample_for_bin=subsample_for_bin,
objective=objective,
class_weight=class_weight,
min_split_gain=min_split_gain,
min_child_weight=min_child_weight,
min_child_samples=min_child_samples,
subsample=subsample,
subsample_freq=subsample_freq,
colsample_bytree=colsample_bytree,
reg_alpha=reg_alpha,
reg_lambda=reg_lambda,
random_state=random_state,
n_jobs=n_jobs,
importance_type=importance_type,
**kwargs,
)

__init__.__doc__ = LGBMModel.__init__.__doc__

Expand Down
29 changes: 29 additions & 0 deletions tests/python_package_test/test_sklearn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# coding: utf-8
import inspect
import itertools
import math
import re
Expand Down Expand Up @@ -500,6 +501,34 @@ def test_clone_and_property():
assert isinstance(clf.feature_importances_, np.ndarray)


@pytest.mark.parametrize("estimator", (lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker))
def test_estimators_all_have_the_same_kwargs_and_defaults(estimator):
base_spec = inspect.getfullargspec(lgb.LGBMModel)
subclass_spec = inspect.getfullargspec(estimator)

# should not allow for any varargs
assert subclass_spec.varargs == base_spec.varargs
assert subclass_spec.varargs is None

# the only varkw should be **kwargs,
assert subclass_spec.varkw == base_spec.varkw
assert subclass_spec.varkw == "kwargs"

# default values for all constructor arguments should be identical
#
# NOTE: if LGBMClassifier / LGBMRanker / LGBMRegressor ever override
# any of LGBMModel's constructor arguments, this will need to be updated
assert subclass_spec.kwonlydefaults == base_spec.kwonlydefaults

# only positional argument should be 'self'
assert subclass_spec.args == base_spec.args
assert subclass_spec.args == ["self"]
assert subclass_spec.defaults is None

# get_params() should be identical
assert estimator().get_params() == lgb.LGBMModel().get_params()


def test_subclassing_get_params_works():
expected_params = {
"boosting_type": "gbdt",
Expand Down

0 comments on commit 3d351a4

Please sign in to comment.