Skip to content

Commit

Permalink
Update schemas for xgboost 2.0.
Browse files Browse the repository at this point in the history
preprocess test data as needed
Loosen version contraint on xgboost accordingly

Signed-off-by: Avi Shinnar <[email protected]>
  • Loading branch information
shinnar committed Jan 29, 2024
1 parent c7a2e3c commit 72d1063
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 3 deletions.
43 changes: 43 additions & 0 deletions lale/lib/xgboost/xgb_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,5 +916,48 @@ def score(self, X, y):
set_as_available=True,
)

if xgboost_version is not None and xgboost_version >= version.Version("2.0"):
# https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn
XGBClassifier = XGBClassifier.customize_schema(
n_estimators={
"description": "Number of trees to fit.",
"anyOf": [
{
"type": "integer",
"default": 200,
"minimumForOptimizer": 50,
"maximumForOptimizer": 1000,
},
{"enum": [None]},
],
},
device={
"description": """Device ordinal""",
"anyOf": [
{"enum": ["cpu", "cuda", "gpu"]},
{"enum": [None]},
],
"default": None,
},
multi_strategy={
"description": """The strategy used for training multi-target models,
including multi-target regression and multi-class classification.
See Multiple Outputs for more information.""",
"anyOf": [
{
"description": "One model for each target.",
"enum": ["one_output_per_tree"],
},
{
"description": "Use multi-target trees.",
"enum": ["multi_output_tree"],
},
{"enum": [None]},
],
"default": None,
},
set_as_available=True,
)


lale.docstrings.set_docstrings(XGBClassifier)
42 changes: 42 additions & 0 deletions lale/lib/xgboost/xgb_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,5 +860,47 @@ def score(self, X, y):
set_as_available=True,
)

if xgboost_version is not None and xgboost_version >= version.Version("2.0"):
# https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn
XGBRegressor = XGBRegressor.customize_schema(
n_estimators={
"description": "Number of trees to fit.",
"anyOf": [
{
"type": "integer",
"default": 200,
"minimumForOptimizer": 50,
"maximumForOptimizer": 1000,
},
{"enum": [None]},
],
},
device={
"description": """Device ordinal""",
"anyOf": [
{"enum": ["cpu", "cuda", "gpu"]},
{"enum": [None]},
],
"default": None,
},
multi_strategy={
"description": """The strategy used for training multi-target models,
including multi-target regression and multi-class classification.
See Multiple Outputs for more information.""",
"anyOf": [
{
"description": "One model for each target.",
"enum": ["one_output_per_tree"],
},
{
"description": "Use multi-target trees.",
"enum": ["multi_output_tree"],
},
{"enum": [None]},
],
"default": None,
},
set_as_available=True,
)

lale.docstrings.set_docstrings(XGBRegressor)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
extras_require = {
"full": [
"mystic",
"xgboost<=1.5.1",
"xgboost<2.1.0",
"lightgbm<4.0.0",
"snapml>=1.7.0rc3,<1.12.0",
"liac-arff>=2.4.0",
Expand Down
4 changes: 2 additions & 2 deletions test/test_core_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,7 @@ def test_sklearn_diabetes(self):
def test_openml_creditg(self):
# classification, categoricals+numbers incl. string, no missing values
(orig_train_X, orig_train_y), _ = lale.datasets.openml.fetch(
"credit-g", "classification", preprocess=False
"credit-g", "classification", preprocess=True
)
subsample_X, _, subsample_y, _ = train_test_split(
orig_train_X, orig_train_y, train_size=0.05
Expand Down Expand Up @@ -859,7 +859,7 @@ def test_missing_boston(self):
def test_missing_creditg(self):
# classification, categoricals+numbers incl. string, synth. missing
(orig_train_X, orig_train_y), _ = lale.datasets.openml.fetch(
"credit-g", "classification", preprocess=False
"credit-g", "classification", preprocess=True
)
subsample_X, _, subsample_y, _ = train_test_split(
orig_train_X, orig_train_y, train_size=0.05
Expand Down

0 comments on commit 72d1063

Please sign in to comment.