Skip to content

Commit

Permalink
[PYDF] Add support for hyperparameter templates
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 580133998
  • Loading branch information
rstz authored and copybara-github committed Nov 7, 2023
1 parent 56a51cc commit 0de44e7
Show file tree
Hide file tree
Showing 7 changed files with 356 additions and 56 deletions.
1 change: 1 addition & 0 deletions yggdrasil_decision_forests/port/python/ydf/learner/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ py_library(
srcs = ["specialized_learners_pre_generated.py"],
deps = [
":generic_learner",
":hyperparameters",
":tuner",
"//ydf/dataset",
"//ydf/dataset:dataspec",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# limitations under the License.

"""Utility functions for YDF Hyperparameters."""
from collections.abc import Mapping
import dataclasses
from typing import Dict, Union

from yggdrasil_decision_forests.model import hyperparameter_pb2
Expand Down Expand Up @@ -69,3 +71,39 @@ def dict_to_generic_hyperparameter(
else:
raise ValueError(f"Invalid value {value} for parameter {key}")
return generic_hps


@dataclasses.dataclass
class HyperparameterTemplate(Mapping):
"""A named and versioned set of hyper-parameters.
List of hyper-parameter sets that outperforms the default hyper-parameters
(either generally or in specific scenarios). A template is also a mapping of
hyperparameters and may be used with the double star operator.
Usage example:
```python
templates = ydf.GradientBoostedTreesLearner.hyperparameter_templates()
better_default = templates["better_defaultv1"]
# Apply the parameters of the template on the learner.
learner = ydf.GradientBoostedTreesLearner(label, **better_default)
```
"""

name: str
version: int
parameters: HyperParameters
description: str

def __iter__(self):
for key in self.parameters.keys():
yield key

def __len__(self):
return len(self.parameters)

def __getitem__(self, item):
if isinstance(self.parameters, dict) and item in self.parameters:
return self.parameters[item]
return None
16 changes: 16 additions & 0 deletions yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,22 @@ def test_predict_iris(self):
row_sums, np.ones(predictions.shape[0]), decimal=5
)

def test_better_default_template(self):
ds = toy_dataset()
label = "label"
templates = (
specialized_learners.GradientBoostedTreesLearner.hyperparameter_templates()
)
self.assertIn("better_defaultv1", templates)
better_defaultv1 = templates["better_defaultv1"]
learner = specialized_learners.GradientBoostedTreesLearner(
label=label, **better_defaultv1
)
self.assertEqual(
learner.hyperparameters["growing_strategy"], "BEST_FIRST_GLOBAL"
)
_ = learner.train(ds)


class LoggingTest(parameterized.TestCase):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@
compilation.
"""

from typing import Optional
from typing import Dict, Optional

from yggdrasil_decision_forests.dataset import data_spec_pb2
from yggdrasil_decision_forests.learner import abstract_learner_pb2
from yggdrasil_decision_forests.model import abstract_model_pb2 # pylint: disable=unused-import
from ydf.dataset import dataspec
from ydf.dataset import dataset
from ydf.learner import generic_learner
from ydf.learner import hyperparameters
from ydf.learner import tuner as tuner_lib


Expand Down Expand Up @@ -68,6 +69,13 @@ class RandomForestLearner(generic_learner.GenericLearner):
print(model.summary())
```
Hyperparameters are configured to give reasonable results for typical
datasets. Hyperparameters can also be modified manually (see descriptions)
below or by applying the hyperparameter templates available with
`RandomForestLearner.hyperparameter_templates()` (see this function's
documentation for
details).
Attributes:
label: Label of the dataset. The label column should not be identified as a
feature in the `features` parameter.
Expand Down Expand Up @@ -450,6 +458,7 @@ def __init__(
"uplift_split_score": uplift_split_score,
"winner_take_all": winner_take_all,
}

data_spec_args = dataspec.DataSpecInferenceArgs(
columns=dataspec.normalize_column_defs(features),
include_all_columns=include_all_columns,
Expand Down Expand Up @@ -491,6 +500,57 @@ def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities:
support_monotonic_constraints=False,
)

@classmethod
def hyperparameter_templates(
cls,
) -> Dict[str, hyperparameters.HyperparameterTemplate]:
r"""Hyperparameter templates for this Learner.
Hyperparameter templates are sets of pre-defined hyperparameters for easy
access to different variants of the learner. Each template is a mapping to a
set of hyperparameters and can be applied directly on the learner.
Usage example:
```python
templates = ydf.RandomForestLearner.hyperparameter_templates()
better_defaultv1 = templates["better_defaultv1"]
# Print a description of the template
print(better_defaultv1.description)
# Apply the template's settings on the learner.
learner = ydf.RandomForestLearner(label, **better_defaultv1)
```
Returns:
Dictionary of the available templates
"""
return {
"better_defaultv1": hyperparameters.HyperparameterTemplate(
name="better_default",
version=1,
description=(
"A configuration that is generally better than the default"
" parameters without being more expensive."
),
parameters={"winner_take_all": True},
),
"benchmark_rank1v1": hyperparameters.HyperparameterTemplate(
name="benchmark_rank1",
version=1,
description=(
"Top ranking hyper-parameters on our benchmark slightly"
" modified to run in reasonable time."
),
parameters={
"winner_take_all": True,
"categorical_algorithm": "RANDOM",
"split_axis": "SPARSE_OBLIQUE",
"sparse_oblique_normalization": "MIN_MAX",
"sparse_oblique_num_projections_exponent": 1.0,
},
),
}


class HyperparameterOptimizerLearner(generic_learner.GenericLearner):
r"""Hyperparameter Optimizer learning algorithm.
Expand All @@ -507,6 +567,13 @@ class HyperparameterOptimizerLearner(generic_learner.GenericLearner):
print(model.summary())
```
Hyperparameters are configured to give reasonable results for typical
datasets. Hyperparameters can also be modified manually (see descriptions)
below or by applying the hyperparameter templates available with
`HyperparameterOptimizerLearner.hyperparameter_templates()` (see this
function's documentation for
details).
Attributes:
label: Label of the dataset. The label column should not be identified as a
feature in the `features` parameter.
Expand Down Expand Up @@ -628,6 +695,7 @@ def __init__(
"pure_serving_model": pure_serving_model,
"random_seed": random_seed,
}

data_spec_args = dataspec.DataSpecInferenceArgs(
columns=dataspec.normalize_column_defs(features),
include_all_columns=include_all_columns,
Expand Down Expand Up @@ -669,6 +737,21 @@ def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities:
support_monotonic_constraints=False,
)

@classmethod
def hyperparameter_templates(
cls,
) -> Dict[str, hyperparameters.HyperparameterTemplate]:
r"""Hyperparameter templates for this Learner.
This learner currently does not provide any hyperparameter templates, this
method is provided for consistency with other learners.
Returns:
Empty dictionary.
"""
return {}


class GradientBoostedTreesLearner(generic_learner.GenericLearner):
r"""Gradient Boosted Trees learning algorithm.
Expand All @@ -692,6 +775,13 @@ class GradientBoostedTreesLearner(generic_learner.GenericLearner):
print(model.summary())
```
Hyperparameters are configured to give reasonable results for typical
datasets. Hyperparameters can also be modified manually (see descriptions)
below or by applying the hyperparameter templates available with
`GradientBoostedTreesLearner.hyperparameter_templates()` (see this function's
documentation for
details).
Attributes:
label: Label of the dataset. The label column should not be identified as a
feature in the `features` parameter.
Expand Down Expand Up @@ -1175,6 +1265,7 @@ def __init__(
"validation_interval_in_trees": validation_interval_in_trees,
"validation_ratio": validation_ratio,
}

data_spec_args = dataspec.DataSpecInferenceArgs(
columns=dataspec.normalize_column_defs(features),
include_all_columns=include_all_columns,
Expand Down Expand Up @@ -1216,6 +1307,57 @@ def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities:
support_monotonic_constraints=True,
)

@classmethod
def hyperparameter_templates(
cls,
) -> Dict[str, hyperparameters.HyperparameterTemplate]:
r"""Hyperparameter templates for this Learner.
Hyperparameter templates are sets of pre-defined hyperparameters for easy
access to different variants of the learner. Each template is a mapping to a
set of hyperparameters and can be applied directly on the learner.
Usage example:
```python
templates = ydf.GradientBoostedTreesLearner.hyperparameter_templates()
better_defaultv1 = templates["better_defaultv1"]
# Print a description of the template
print(better_defaultv1.description)
# Apply the template's settings on the learner.
learner = ydf.GradientBoostedTreesLearner(label, **better_defaultv1)
```
Returns:
Dictionary of the available templates
"""
return {
"better_defaultv1": hyperparameters.HyperparameterTemplate(
name="better_default",
version=1,
description=(
"A configuration that is generally better than the default"
" parameters without being more expensive."
),
parameters={"growing_strategy": "BEST_FIRST_GLOBAL"},
),
"benchmark_rank1v1": hyperparameters.HyperparameterTemplate(
name="benchmark_rank1",
version=1,
description=(
"Top ranking hyper-parameters on our benchmark slightly"
" modified to run in reasonable time."
),
parameters={
"growing_strategy": "BEST_FIRST_GLOBAL",
"categorical_algorithm": "RANDOM",
"split_axis": "SPARSE_OBLIQUE",
"sparse_oblique_normalization": "MIN_MAX",
"sparse_oblique_num_projections_exponent": 1.0,
},
),
}


class CartLearner(generic_learner.GenericLearner):
r"""Cart learning algorithm.
Expand All @@ -1237,6 +1379,13 @@ class CartLearner(generic_learner.GenericLearner):
print(model.summary())
```
Hyperparameters are configured to give reasonable results for typical
datasets. Hyperparameters can also be modified manually (see descriptions)
below or by applying the hyperparameter templates available with
`CartLearner.hyperparameter_templates()` (see this function's documentation
for
details).
Attributes:
label: Label of the dataset. The label column should not be identified as a
feature in the `features` parameter.
Expand Down Expand Up @@ -1561,6 +1710,7 @@ def __init__(
"uplift_split_score": uplift_split_score,
"validation_ratio": validation_ratio,
}

data_spec_args = dataspec.DataSpecInferenceArgs(
columns=dataspec.normalize_column_defs(features),
include_all_columns=include_all_columns,
Expand Down Expand Up @@ -1601,3 +1751,17 @@ def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities:
support_max_model_size_in_memory=False,
support_monotonic_constraints=False,
)

@classmethod
def hyperparameter_templates(
cls,
) -> Dict[str, hyperparameters.HyperparameterTemplate]:
r"""Hyperparameter templates for this Learner.
This learner currently does not provide any hyperparameter templates, this
method is provided for consistency with other learners.
Returns:
Empty dictionary.
"""
return {}
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def py_wrap_yggdrasil_learners(
"//ydf/dataset:dataset",
"//ydf/dataset:dataspec",
"//ydf/learner:generic_learner",
"//ydf/learner:hyperparameters",
"//ydf/learner:tuner",
],
data = [":" + run_wrapper_name, ":" + wrapper_name],
Expand Down
Loading

0 comments on commit 0de44e7

Please sign in to comment.