diff --git a/rubicon_ml/client/rubicon_json.py b/rubicon_ml/client/rubicon_json.py index 1f53e1cc..e2364e53 100644 --- a/rubicon_ml/client/rubicon_json.py +++ b/rubicon_ml/client/rubicon_json.py @@ -1,3 +1,5 @@ +import copy +import numbers from typing import Any, Dict, List, Optional, Type, Union from jsonpath_ng.ext import parse @@ -33,6 +35,9 @@ def __init__( experiments = self._validate_input(experiments, Experiment, "experiments") self._json = self._convert_to_json(rubicon_objects, projects, experiments) + self._json_numeric = self._convert_to_json( + rubicon_objects, projects, experiments, filter_nonnumeric=True + ) def _validate_input( self, @@ -54,30 +59,46 @@ def _convert_to_json( rubicon_objects: Optional[List[Rubicon]] = None, projects: Optional[List[Project]] = None, experiments: Optional[List[Experiment]] = None, + filter_nonnumeric: bool = False, ): rubicon_json = {} if rubicon_objects is not None: - rubicon_json["project"] = self._rubicon_to_json(rubicon_objects)["project"] + rubicon_json["project"] = self._rubicon_to_json( + rubicon_objects, filter_nonnumeric=filter_nonnumeric + )["project"] if projects is not None: project_json = rubicon_json.get("project", []) - project_json.extend(self._projects_to_json(projects)["project"]) + project_json.extend( + self._projects_to_json(projects, filter_nonnumeric=filter_nonnumeric)["project"] + ) rubicon_json["project"] = project_json if experiments is not None: - rubicon_json["experiment"] = self._experiments_to_json(experiments)["experiment"] + rubicon_json["experiment"] = self._experiments_to_json( + experiments, filter_nonnumeric=filter_nonnumeric + )["experiment"] return rubicon_json - def _experiments_to_json(self, experiments: List[Experiment]): + def _experiments_to_json(self, experiments: List[Experiment], filter_nonnumeric: bool = False): rubicon_json: Dict[str, Any] = {"experiment": []} for e in experiments: - experiment_json = e._domain.__dict__ + experiment_json = copy.deepcopy(e._domain.__dict__) experiment_json["feature"] = [f._domain.__dict__ for f in e.features()] - experiment_json["parameter"] = [p._domain.__dict__ for p in e.parameters()] - experiment_json["metric"] = [m._domain.__dict__ for m in e.metrics()] + + experiment_json["parameter"] = [] + for parameter in e.parameters(): + if not filter_nonnumeric or isinstance(parameter.value, numbers.Number): + experiment_json["parameter"].append(parameter._domain.__dict__) + + experiment_json["metric"] = [] + for metric in e.metrics(): + if not filter_nonnumeric or isinstance(metric.value, numbers.Number): + experiment_json["metric"].append(metric._domain.__dict__) + experiment_json["artifact"] = [a._domain.__dict__ for a in e.artifacts()] experiment_json["dataframe"] = [d._domain.__dict__ for d in e.dataframes()] @@ -85,7 +106,7 @@ def _experiments_to_json(self, experiments: List[Experiment]): return rubicon_json - def _projects_to_json(self, projects: List[Project]): + def _projects_to_json(self, projects: List[Project], filter_nonnumeric: bool = False): rubicon_json: Dict[str, Any] = {"project": []} for p in projects: @@ -93,18 +114,22 @@ def _projects_to_json(self, projects: List[Project]): project_json["artifact"] = [a._domain.__dict__ for a in p.artifacts()] project_json["dataframe"] = [d._domain.__dict__ for d in p.dataframes()] - experiment_json = self._experiments_to_json(p.experiments()) + experiment_json = self._experiments_to_json( + p.experiments(), filter_nonnumeric=filter_nonnumeric + ) project_json["experiment"] = experiment_json["experiment"] rubicon_json["project"].append(project_json) return rubicon_json - def _rubicon_to_json(self, rubicon_objects: List[Rubicon]): + def _rubicon_to_json(self, rubicon_objects: List[Rubicon], filter_nonnumeric: bool = False): rubicon_json: Dict[str, Any] = {"project": []} for r in rubicon_objects: - rubicon_json["project"].extend(self._projects_to_json(r.projects())["project"]) + rubicon_json["project"].extend( + self._projects_to_json(r.projects(), filter_nonnumeric=filter_nonnumeric)["project"] + ) return rubicon_json @@ -119,7 +144,15 @@ def search(self, query: str): query: JSONPath-like query """ - return parse(query).find(self._json) + if ">" in query or "<" in query: + # non-numerics break greater than and less than comparisons in `jsonpath_ng` + # so we use the json with replaced non-numeric values when '>' or '<' appear + # in the `query` + json = self._json_numeric + else: + json = self._json + + return parse(query).find(json) @property def json(self): @@ -127,3 +160,10 @@ def json(self): The json representation of the `rubicon-ml` objects. """ return self._json + + @property + def json_numeric(self): + """ + The json representation of the `rubicon-ml` objects with numeric values. + """ + return self._json_numeric diff --git a/tests/unit/client/test_rubicon_json_client.py b/tests/unit/client/test_rubicon_json_client.py index 349184c7..3284870f 100644 --- a/tests/unit/client/test_rubicon_json_client.py +++ b/tests/unit/client/test_rubicon_json_client.py @@ -11,6 +11,7 @@ def test_experiment_to_json_single_experiment(rubicon_and_project_client): experiment.log_feature("year") experiment.log_metric("accuracy", 0.87) experiment.log_metric("runtime(s)", 45) + experiment.log_metric("kernel", "linear") experiment.log_artifact(name="example artifact", data_bytes=b"a") experiment.log_dataframe(pd.DataFrame([[0, 1], [1, 0]])) @@ -30,8 +31,19 @@ def test_experiment_to_json_single_experiment(rubicon_and_project_client): assert isinstance(json["experiment"][0]["artifact"], list) assert isinstance(json["experiment"][0]["dataframe"], list) + assert isinstance(json["experiment"][0]["metric"][0]["value"], float) + assert isinstance(json["experiment"][0]["metric"][1]["value"], int) + assert isinstance(json["experiment"][0]["metric"][2]["value"], str) + assert json["experiment"][0]["tags"] == ["a", "b"] - assert len(json["experiment"][0]["metric"]) == 2 + assert len(json["experiment"][0]["metric"]) == 3 + + json_numeric = experiment_as_json.json_numeric + + assert isinstance(json_numeric["experiment"][0]["metric"][0]["value"], float) + assert isinstance(json_numeric["experiment"][0]["metric"][1]["value"], int) + + assert len(json_numeric["experiment"][0]["metric"]) == 2 def test_experiment_to_json_multiple_experiments(rubicon_and_project_client_with_experiments): @@ -158,3 +170,24 @@ def test_convert_to_json_projects_and_experiments_input( assert isinstance(json, dict) assert isinstance(json["project"], list) assert len(json["experiment"]) == 2 + + +@pytest.mark.parametrize( + ["query", "expected_results"], + [ + ("$..experiment[*].metric[?(@.value>0.0)].name", ["accuracy", "runtime(s)"]), + ("$..experiment[*].metric[?(@.name='kernel')].value", ["linear"]), + ], +) +def test_search(query, expected_results, rubicon_and_project_client): + _, project = rubicon_and_project_client + experiment = project.log_experiment("search experiment", tags=["a", "b"]) + experiment.log_metric("accuracy", 0.87) + experiment.log_metric("runtime(s)", 45) + experiment.log_metric("kernel", "linear") + + experiment_as_json = RubiconJSON(experiments=experiment) + results = experiment_as_json.search(query) + + for result, expected_result in zip(results, expected_results): + assert result.value == expected_result