From 4aa14542e0546f94b0c110a5c6eab3b2bd5d6ef3 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Tue, 27 Feb 2024 14:39:31 -0500 Subject: [PATCH] add `H2OTargetEncoderEstimator` schema --- rubicon_ml/schema/registry.py | 3 ++ .../h2o__H2OTargetEncoderEstimator.yaml | 32 +++++++++++++++++++ tests/integration/test_schema.py | 2 ++ 3 files changed, 37 insertions(+) create mode 100644 rubicon_ml/schema/schema/h2o__H2OTargetEncoderEstimator.yaml diff --git a/rubicon_ml/schema/registry.py b/rubicon_ml/schema/registry.py index 2dc69955..468da5d3 100644 --- a/rubicon_ml/schema/registry.py +++ b/rubicon_ml/schema/registry.py @@ -15,6 +15,9 @@ "h2o__H2ORandomForestEstimator": lambda: _load_schema( os.path.join("schema", "h2o__H2ORandomForestEstimator.yaml") ), + "h2o__H2OTargetEncoderEstimator": lambda: _load_schema( + os.path.join("schema", "h2o__H2OTargetEncoderEstimator.yaml") + ), "lightgbm__LGBMModel": lambda: _load_schema(os.path.join("schema", "lightgbm__LGBMModel.yaml")), "lightgbm__LGBMClassifier": lambda: _load_schema( os.path.join("schema", "lightgbm__LGBMClassifier.yaml") diff --git a/rubicon_ml/schema/schema/h2o__H2OTargetEncoderEstimator.yaml b/rubicon_ml/schema/schema/h2o__H2OTargetEncoderEstimator.yaml new file mode 100644 index 00000000..1703cd05 --- /dev/null +++ b/rubicon_ml/schema/schema/h2o__H2OTargetEncoderEstimator.yaml @@ -0,0 +1,32 @@ +name: h2o__H2OTargetEncoderEstimator +version: 1.0.0 + +compatibility: + lightgbm: + max_version: + min_version: 3.44.0.1 +docs_url: https://docs.h2o.ai/h2o/latest-stable/h2o-py/docs/modeling.html#h2otargetencoderestimator + +parameters: + - name: blending + value_attr: blending + - name: columns_to_encode + value_attr: columns_to_encode + - name: data_leakage_handling + value_attr: data_leakage_handling + - name: fold_column + value_attr: fold_column + - name: ignored_columns + value_attr: ignored_columns + - name: inflection_point + value_attr: inflection_point + - name: keep_original_categorical_columns + value_attr: keep_original_categorical_columns + - name: noise + value_attr: noise + - name: response_column + value_attr: response_column + - name: seed + value_attr: seed + - name: smoothing + value_attr: smoothing diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index c6051b6f..1e95ee11 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -5,6 +5,7 @@ from h2o.estimators.gbm import H2OGradientBoostingEstimator from h2o.estimators.glm import H2OGeneralizedLinearEstimator from h2o.estimators.random_forest import H2ORandomForestEstimator +from h2o.estimators.targetencoder import H2OTargetEncoderEstimator from lightgbm import LGBMClassifier, LGBMRegressor from sklearn.ensemble import RandomForestClassifier from xgboost import XGBClassifier, XGBRegressor @@ -14,6 +15,7 @@ H2OGeneralizedLinearEstimator, H2OGradientBoostingEstimator, H2ORandomForestEstimator, + H2OTargetEncoderEstimator, ] PANDAS_SCHEMA_CLS = [ LGBMClassifier,