Skip to content

Commit

Permalink
json query bugfix (#394)
Browse files Browse the repository at this point in the history
* fix <, > in search
* add/updates tests
* only numericize parameters and metrics
* don't zero, just remove
  • Loading branch information
ryanSoley authored Oct 12, 2023
1 parent 0ced4ab commit 4d042c5
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 13 deletions.
64 changes: 52 additions & 12 deletions rubicon_ml/client/rubicon_json.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import copy
import numbers
from typing import Any, Dict, List, Optional, Type, Union

from jsonpath_ng.ext import parse
Expand Down Expand Up @@ -33,6 +35,9 @@ def __init__(
experiments = self._validate_input(experiments, Experiment, "experiments")

self._json = self._convert_to_json(rubicon_objects, projects, experiments)
self._json_numeric = self._convert_to_json(
rubicon_objects, projects, experiments, filter_nonnumeric=True
)

def _validate_input(
self,
Expand All @@ -54,57 +59,77 @@ def _convert_to_json(
rubicon_objects: Optional[List[Rubicon]] = None,
projects: Optional[List[Project]] = None,
experiments: Optional[List[Experiment]] = None,
filter_nonnumeric: bool = False,
):
rubicon_json = {}

if rubicon_objects is not None:
rubicon_json["project"] = self._rubicon_to_json(rubicon_objects)["project"]
rubicon_json["project"] = self._rubicon_to_json(
rubicon_objects, filter_nonnumeric=filter_nonnumeric
)["project"]

if projects is not None:
project_json = rubicon_json.get("project", [])
project_json.extend(self._projects_to_json(projects)["project"])
project_json.extend(
self._projects_to_json(projects, filter_nonnumeric=filter_nonnumeric)["project"]
)
rubicon_json["project"] = project_json

if experiments is not None:
rubicon_json["experiment"] = self._experiments_to_json(experiments)["experiment"]
rubicon_json["experiment"] = self._experiments_to_json(
experiments, filter_nonnumeric=filter_nonnumeric
)["experiment"]

return rubicon_json

def _experiments_to_json(self, experiments: List[Experiment]):
def _experiments_to_json(self, experiments: List[Experiment], filter_nonnumeric: bool = False):
rubicon_json: Dict[str, Any] = {"experiment": []}

for e in experiments:
experiment_json = e._domain.__dict__
experiment_json = copy.deepcopy(e._domain.__dict__)
experiment_json["feature"] = [f._domain.__dict__ for f in e.features()]
experiment_json["parameter"] = [p._domain.__dict__ for p in e.parameters()]
experiment_json["metric"] = [m._domain.__dict__ for m in e.metrics()]

experiment_json["parameter"] = []
for parameter in e.parameters():
if not filter_nonnumeric or isinstance(parameter.value, numbers.Number):
experiment_json["parameter"].append(parameter._domain.__dict__)

experiment_json["metric"] = []
for metric in e.metrics():
if not filter_nonnumeric or isinstance(metric.value, numbers.Number):
experiment_json["metric"].append(metric._domain.__dict__)

experiment_json["artifact"] = [a._domain.__dict__ for a in e.artifacts()]
experiment_json["dataframe"] = [d._domain.__dict__ for d in e.dataframes()]

rubicon_json["experiment"].append(experiment_json)

return rubicon_json

def _projects_to_json(self, projects: List[Project]):
def _projects_to_json(self, projects: List[Project], filter_nonnumeric: bool = False):
rubicon_json: Dict[str, Any] = {"project": []}

for p in projects:
project_json = p._domain.__dict__
project_json["artifact"] = [a._domain.__dict__ for a in p.artifacts()]
project_json["dataframe"] = [d._domain.__dict__ for d in p.dataframes()]

experiment_json = self._experiments_to_json(p.experiments())
experiment_json = self._experiments_to_json(
p.experiments(), filter_nonnumeric=filter_nonnumeric
)
project_json["experiment"] = experiment_json["experiment"]

rubicon_json["project"].append(project_json)

return rubicon_json

def _rubicon_to_json(self, rubicon_objects: List[Rubicon]):
def _rubicon_to_json(self, rubicon_objects: List[Rubicon], filter_nonnumeric: bool = False):
rubicon_json: Dict[str, Any] = {"project": []}

for r in rubicon_objects:
rubicon_json["project"].extend(self._projects_to_json(r.projects())["project"])
rubicon_json["project"].extend(
self._projects_to_json(r.projects(), filter_nonnumeric=filter_nonnumeric)["project"]
)

return rubicon_json

Expand All @@ -119,11 +144,26 @@ def search(self, query: str):
query: JSONPath-like query
"""

return parse(query).find(self._json)
if ">" in query or "<" in query:
# non-numerics break greater than and less than comparisons in `jsonpath_ng`
# so we use the json with replaced non-numeric values when '>' or '<' appear
# in the `query`
json = self._json_numeric
else:
json = self._json

return parse(query).find(json)

@property
def json(self):
"""
The json representation of the `rubicon-ml` objects.
"""
return self._json

@property
def json_numeric(self):
"""
The json representation of the `rubicon-ml` objects with numeric values.
"""
return self._json_numeric
35 changes: 34 additions & 1 deletion tests/unit/client/test_rubicon_json_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def test_experiment_to_json_single_experiment(rubicon_and_project_client):
experiment.log_feature("year")
experiment.log_metric("accuracy", 0.87)
experiment.log_metric("runtime(s)", 45)
experiment.log_metric("kernel", "linear")
experiment.log_artifact(name="example artifact", data_bytes=b"a")
experiment.log_dataframe(pd.DataFrame([[0, 1], [1, 0]]))

Expand All @@ -30,8 +31,19 @@ def test_experiment_to_json_single_experiment(rubicon_and_project_client):
assert isinstance(json["experiment"][0]["artifact"], list)
assert isinstance(json["experiment"][0]["dataframe"], list)

assert isinstance(json["experiment"][0]["metric"][0]["value"], float)
assert isinstance(json["experiment"][0]["metric"][1]["value"], int)
assert isinstance(json["experiment"][0]["metric"][2]["value"], str)

assert json["experiment"][0]["tags"] == ["a", "b"]
assert len(json["experiment"][0]["metric"]) == 2
assert len(json["experiment"][0]["metric"]) == 3

json_numeric = experiment_as_json.json_numeric

assert isinstance(json_numeric["experiment"][0]["metric"][0]["value"], float)
assert isinstance(json_numeric["experiment"][0]["metric"][1]["value"], int)

assert len(json_numeric["experiment"][0]["metric"]) == 2


def test_experiment_to_json_multiple_experiments(rubicon_and_project_client_with_experiments):
Expand Down Expand Up @@ -158,3 +170,24 @@ def test_convert_to_json_projects_and_experiments_input(
assert isinstance(json, dict)
assert isinstance(json["project"], list)
assert len(json["experiment"]) == 2


@pytest.mark.parametrize(
["query", "expected_results"],
[
("$..experiment[*].metric[?(@.value>0.0)].name", ["accuracy", "runtime(s)"]),
("$..experiment[*].metric[?(@.name='kernel')].value", ["linear"]),
],
)
def test_search(query, expected_results, rubicon_and_project_client):
_, project = rubicon_and_project_client
experiment = project.log_experiment("search experiment", tags=["a", "b"])
experiment.log_metric("accuracy", 0.87)
experiment.log_metric("runtime(s)", 45)
experiment.log_metric("kernel", "linear")

experiment_as_json = RubiconJSON(experiments=experiment)
results = experiment_as_json.search(query)

for result, expected_result in zip(results, expected_results):
assert result.value == expected_result

0 comments on commit 4d042c5

Please sign in to comment.