From 03f1f845d59438618411aa520cbc3b137e0ff899 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Mon, 19 Aug 2024 11:09:21 -0400 Subject: [PATCH 01/16] test setup & JSON fixtures --- tests/fixtures.py | 64 +++++++++++++++++++ .../regression/test_read_write_regression.py | 13 ++++ 2 files changed, 77 insertions(+) create mode 100644 tests/regression/test_read_write_regression.py diff --git a/tests/fixtures.py b/tests/fixtures.py index 87136d7a..89b7d4d7 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -562,3 +562,67 @@ def make_classification_dask_df(make_classification_df): X_df, y_da = dd.from_pandas(X, npartitions=1), da.from_array(y) return X_df, y_da + + +@pytest.fixture +def project_json(): + """JSON representation of a project.""" + return {"id": uuid.uuid4()} + + +@pytest.fixture +def experiment_json(): + """JSON representation of an experiment.""" + return {"id": uuid.uuid4()} + + +@pytest.fixture +def feature_json(): + """JSON representation of a feature.""" + return {"id": uuid.uuid4()} + + +@pytest.fixture +def metric_json(): + """JSON representation of a metric.""" + return {"id": uuid.uuid4()} + + +@pytest.fixture +def parameter_json(): + """JSON representation of a parameter.""" + return {"id": uuid.uuid4()} + + +@pytest.fixture +def artifact_json(): + """JSON representation of an artifact.""" + return {"id": uuid.uuid4()} + + +@pytest.fixture +def dataframe_json(): + """JSON representation of a dataframe.""" + return {"id": uuid.uuid4()} + + +@pytest.fixture +def json_entities( + artifact_json, + dataframe_json, + experiment_json, + feature_json, + metric_json, + parameter_json, + project_json, +): + """JSON representations of each entity in the rubicon-ml data model.""" + return { + "artifact": artifact_json, + "dataframe": dataframe_json, + "experiment": experiment_json, + "feature": feature_json, + "metric": metric_json, + "parameter": parameter_json, + "project": project_json, + } diff --git a/tests/regression/test_read_write_regression.py b/tests/regression/test_read_write_regression.py new file mode 100644 index 00000000..88ec52ad --- /dev/null +++ b/tests/regression/test_read_write_regression.py @@ -0,0 +1,13 @@ +def test_read_regression(json_entities): + """Tests that `rubicon_ml` can read each domain entity from the filesystem.""" + pass + + +def test_read_write_regression(json_entities): + """Tests that `rubicon_ml` can read each domain entity that it wrote.""" + pass + + +def test_write_regression(json_entities): + """Tests that `rubicon_ml` can write each domain entity to the filesystem.""" + pass From 69c728b03375842a82884c29e58165f3e033933b Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Mon, 19 Aug 2024 11:41:04 -0400 Subject: [PATCH 02/16] rename test file --- ...est_read_write_regression.py => test_repository_read_write.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/regression/{test_read_write_regression.py => test_repository_read_write.py} (100%) diff --git a/tests/regression/test_read_write_regression.py b/tests/regression/test_repository_read_write.py similarity index 100% rename from tests/regression/test_read_write_regression.py rename to tests/regression/test_repository_read_write.py From f506bdcf6bede1dc984626c4b8b32181db5b6fcd Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Mon, 19 Aug 2024 12:23:47 -0400 Subject: [PATCH 03/16] add projects to regression tests --- tests/fixtures.py | 44 ++++------ .../regression/test_repository_read_write.py | 88 +++++++++++++++++-- 2 files changed, 97 insertions(+), 35 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 89b7d4d7..bc80808a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,3 +1,4 @@ +import datetime import os import random import uuid @@ -567,62 +568,47 @@ def make_classification_dask_df(make_classification_df): @pytest.fixture def project_json(): """JSON representation of a project.""" - return {"id": uuid.uuid4()} + return { + "name": "test project", + "created_at": datetime.datetime(2024, 1, 1), + "description": "test project description", + "github_url": "github.com", + "id": str(uuid.uuid4()), + "training_metadata": [["training", "metadata"]], + } @pytest.fixture def experiment_json(): """JSON representation of an experiment.""" - return {"id": uuid.uuid4()} + return {"id": str(uuid.uuid4())} @pytest.fixture def feature_json(): """JSON representation of a feature.""" - return {"id": uuid.uuid4()} + return {"id": str(uuid.uuid4())} @pytest.fixture def metric_json(): """JSON representation of a metric.""" - return {"id": uuid.uuid4()} + return {"id": str(uuid.uuid4())} @pytest.fixture def parameter_json(): """JSON representation of a parameter.""" - return {"id": uuid.uuid4()} + return {"id": str(uuid.uuid4())} @pytest.fixture def artifact_json(): """JSON representation of an artifact.""" - return {"id": uuid.uuid4()} + return {"id": str(uuid.uuid4())} @pytest.fixture def dataframe_json(): """JSON representation of a dataframe.""" - return {"id": uuid.uuid4()} - - -@pytest.fixture -def json_entities( - artifact_json, - dataframe_json, - experiment_json, - feature_json, - metric_json, - parameter_json, - project_json, -): - """JSON representations of each entity in the rubicon-ml data model.""" - return { - "artifact": artifact_json, - "dataframe": dataframe_json, - "experiment": experiment_json, - "feature": feature_json, - "metric": metric_json, - "parameter": parameter_json, - "project": project_json, - } + return {"id": str(uuid.uuid4())} diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 88ec52ad..636fa6b2 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -1,13 +1,89 @@ -def test_read_regression(json_entities): +import os +import tempfile + +import fsspec + +from rubicon_ml import domain +from rubicon_ml.repository import LocalRepository +from rubicon_ml.repository.utils import json, slugify + + +def test_read_regression( + artifact_json, + dataframe_json, + experiment_json, + feature_json, + metric_json, + parameter_json, + project_json, +): """Tests that `rubicon_ml` can read each domain entity from the filesystem.""" - pass + filesystem = fsspec.filesystem("file") + + with tempfile.TemporaryDirectory() as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = LocalRepository(root_dir=root_dir) + + expected_project_path = os.path.join( + root_dir, + slugify(project_json["name"]), + "metadata.json", + ) + + filesystem.mkdirs(os.path.dirname(expected_project_path), exist_ok=True) + with filesystem.open(expected_project_path, "w") as file: + file.write(json.dumps(project_json)) + + project = repository.get_project(project_json["name"]).__dict__ + assert project == project_json -def test_read_write_regression(json_entities): + +def test_read_write_regression( + artifact_json, + dataframe_json, + experiment_json, + feature_json, + metric_json, + parameter_json, + project_json, +): """Tests that `rubicon_ml` can read each domain entity that it wrote.""" - pass + with tempfile.TemporaryDirectory() as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = LocalRepository(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + project = repository.get_project(project_json["name"]).__dict__ + assert project == project_json -def test_write_regression(json_entities): + +def test_write_regression( + artifact_json, + dataframe_json, + experiment_json, + feature_json, + metric_json, + parameter_json, + project_json, +): """Tests that `rubicon_ml` can write each domain entity to the filesystem.""" - pass + filesystem = fsspec.filesystem("file") + + with tempfile.TemporaryDirectory() as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = LocalRepository(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + + expected_project_path = os.path.join( + root_dir, + slugify(project_json["name"]), + "metadata.json", + ) + + with filesystem.open(expected_project_path, "r") as file: + project = json.loads(file.read()) + + assert project == project_json From 29350298374928de79d1a09aa5445ca4f0ecef62 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Wed, 28 Aug 2024 11:42:20 -0400 Subject: [PATCH 04/16] add experiments to regression tests --- tests/fixtures.py | 14 ++++- .../regression/test_repository_read_write.py | 54 +++++++++++++++---- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index bc80808a..9c913a0e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -581,7 +581,19 @@ def project_json(): @pytest.fixture def experiment_json(): """JSON representation of an experiment.""" - return {"id": str(uuid.uuid4())} + return { + "project_name": "test project", + "branch_name": "test-branch", + "comments": ["comment a", "comment b"], + "commit_hash": "abcde01", + "created_at": datetime.datetime(2024, 1, 1), + "description": "test experiment description", + "id": str(uuid.uuid4()), + "model_name": "test model", + "name": "test experiment", + "tags": ["tag_a", "tag_b"], + "training_metadata": [["training", "metadata"]], + } @pytest.fixture diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 636fa6b2..6e176eb2 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -24,11 +24,8 @@ def test_read_regression( root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = LocalRepository(root_dir=root_dir) - expected_project_path = os.path.join( - root_dir, - slugify(project_json["name"]), - "metadata.json", - ) + expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) + expected_project_path = os.path.join(expected_project_dir, "metadata.json") filesystem.mkdirs(os.path.dirname(expected_project_path), exist_ok=True) with filesystem.open(expected_project_path, "w") as file: @@ -38,6 +35,24 @@ def test_read_regression( assert project == project_json + expected_experiment_dir = os.path.join( + expected_project_dir, + "experiments", + experiment_json["id"], + ) + expected_experiment_path = os.path.join(expected_experiment_dir, "metadata.json") + + filesystem.mkdirs(os.path.dirname(expected_experiment_path), exist_ok=True) + with filesystem.open(expected_experiment_path, "w") as file: + file.write(json.dumps(experiment_json)) + + experiment = repository.get_experiment( + project_json["name"], + experiment_json["id"], + ).__dict__ + + assert experiment == experiment_json + def test_read_write_regression( artifact_json, @@ -58,6 +73,14 @@ def test_read_write_regression( assert project == project_json + repository.create_experiment(domain.Experiment(**experiment_json)) + experiment = repository.get_experiment( + project_json["name"], + experiment_json["id"], + ).__dict__ + + assert experiment == experiment_json + def test_write_regression( artifact_json, @@ -77,13 +100,24 @@ def test_write_regression( repository.create_project(domain.Project(**project_json)) - expected_project_path = os.path.join( - root_dir, - slugify(project_json["name"]), - "metadata.json", - ) + expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) + expected_project_path = os.path.join(expected_project_dir, "metadata.json") with filesystem.open(expected_project_path, "r") as file: project = json.loads(file.read()) assert project == project_json + + repository.create_experiment(domain.Experiment(**experiment_json)) + + expected_experiment_dir = os.path.join( + expected_project_dir, + "experiments", + experiment_json["id"], + ) + expected_experiment_path = os.path.join(expected_experiment_dir, "metadata.json") + + with filesystem.open(expected_experiment_path, "r") as file: + experiment = json.loads(file.read()) + + assert experiment == experiment_json From dc5f745d540a2437dad5f95053589f8bd8bce574 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Wed, 28 Aug 2024 12:16:13 -0400 Subject: [PATCH 05/16] add features, metrics & parameters to regression tests --- tests/fixtures.py | 31 +++- .../regression/test_repository_read_write.py | 150 ++++++++++++++++++ 2 files changed, 178 insertions(+), 3 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 9c913a0e..5dcffdb0 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -599,19 +599,44 @@ def experiment_json(): @pytest.fixture def feature_json(): """JSON representation of a feature.""" - return {"id": str(uuid.uuid4())} + return { + "name": "test feature", + "comments": ["comment a", "comment b"], + "created_at": datetime.datetime(2024, 1, 1), + "description": "test feature description", + "id": str(uuid.uuid4()), + "importance": 1.0, + "tags": ["tag_a", "tag_b"], + } @pytest.fixture def metric_json(): """JSON representation of a metric.""" - return {"id": str(uuid.uuid4())} + return { + "name": "test metric", + "value": 1.0, + "comments": ["comment a", "comment b"], + "created_at": datetime.datetime(2024, 1, 1), + "description": "test metric description", + "directionality": "score", + "id": str(uuid.uuid4()), + "tags": ["tag_a", "tag_b"], + } @pytest.fixture def parameter_json(): """JSON representation of a parameter.""" - return {"id": str(uuid.uuid4())} + return { + "name": "test parameter", + "value": 1.0, + "comments": ["comment a", "comment b"], + "created_at": datetime.datetime(2024, 1, 1), + "description": "test parameter description", + "id": str(uuid.uuid4()), + "tags": ["tag_a", "tag_b"], + } @pytest.fixture diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 6e176eb2..95837eea 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -53,6 +53,63 @@ def test_read_regression( assert experiment == experiment_json + expected_feature_dir = os.path.join( + expected_experiment_dir, + "features", + slugify(feature_json["name"]), + ) + expected_feature_path = os.path.join(expected_feature_dir, "metadata.json") + + filesystem.mkdirs(os.path.dirname(expected_feature_path), exist_ok=True) + with filesystem.open(expected_feature_path, "w") as file: + file.write(json.dumps(feature_json)) + + feature = repository.get_feature( + project_json["name"], + experiment_json["id"], + feature_json["name"], + ).__dict__ + + assert feature == feature_json + + expected_metric_dir = os.path.join( + expected_experiment_dir, + "metrics", + slugify(metric_json["name"]), + ) + expected_metric_path = os.path.join(expected_metric_dir, "metadata.json") + + filesystem.mkdirs(os.path.dirname(expected_metric_path), exist_ok=True) + with filesystem.open(expected_metric_path, "w") as file: + file.write(json.dumps(metric_json)) + + metric = repository.get_metric( + project_json["name"], + experiment_json["id"], + metric_json["name"], + ).__dict__ + + assert metric == metric_json + + expected_parameter_dir = os.path.join( + expected_experiment_dir, + "parameters", + slugify(parameter_json["name"]), + ) + expected_parameter_path = os.path.join(expected_parameter_dir, "metadata.json") + + filesystem.mkdirs(os.path.dirname(expected_parameter_path), exist_ok=True) + with filesystem.open(expected_parameter_path, "w") as file: + file.write(json.dumps(parameter_json)) + + parameter = repository.get_parameter( + project_json["name"], + experiment_json["id"], + parameter_json["name"], + ).__dict__ + + assert parameter == parameter_json + def test_read_write_regression( artifact_json, @@ -81,6 +138,45 @@ def test_read_write_regression( assert experiment == experiment_json + repository.create_feature( + domain.Feature(**feature_json), + project_json["name"], + experiment_json["id"], + ) + feature = repository.get_feature( + project_json["name"], + experiment_json["id"], + feature_json["name"], + ).__dict__ + + assert feature == feature_json + + repository.create_metric( + domain.Metric(**metric_json), + project_json["name"], + experiment_json["id"], + ) + metric = repository.get_metric( + project_json["name"], + experiment_json["id"], + metric_json["name"], + ).__dict__ + + assert metric == metric_json + + repository.create_parameter( + domain.Parameter(**parameter_json), + project_json["name"], + experiment_json["id"], + ) + parameter = repository.get_parameter( + project_json["name"], + experiment_json["id"], + parameter_json["name"], + ).__dict__ + + assert parameter == parameter_json + def test_write_regression( artifact_json, @@ -121,3 +217,57 @@ def test_write_regression( experiment = json.loads(file.read()) assert experiment == experiment_json + + repository.create_feature( + domain.Feature(**feature_json), + project_json["name"], + experiment_json["id"], + ) + + expected_feature_dir = os.path.join( + expected_experiment_dir, + "features", + slugify(feature_json["name"]), + ) + expected_feature_path = os.path.join(expected_feature_dir, "metadata.json") + + with filesystem.open(expected_feature_path, "r") as file: + feature = json.loads(file.read()) + + assert feature == feature_json + + repository.create_metric( + domain.Metric(**metric_json), + project_json["name"], + experiment_json["id"], + ) + + expected_metric_dir = os.path.join( + expected_experiment_dir, + "metrics", + slugify(metric_json["name"]), + ) + expected_metric_path = os.path.join(expected_metric_dir, "metadata.json") + + with filesystem.open(expected_metric_path, "r") as file: + metric = json.loads(file.read()) + + assert metric == metric_json + + repository.create_parameter( + domain.Parameter(**parameter_json), + project_json["name"], + experiment_json["id"], + ) + + expected_parameter_dir = os.path.join( + expected_experiment_dir, + "parameters", + slugify(parameter_json["name"]), + ) + expected_parameter_path = os.path.join(expected_parameter_dir, "metadata.json") + + with filesystem.open(expected_parameter_path, "r") as file: + parameter = json.loads(file.read()) + + assert parameter == parameter_json From 8ca34b80392c98190feecc8fd0e82843412fd3c8 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Wed, 28 Aug 2024 16:19:32 -0400 Subject: [PATCH 06/16] add artifact & dataframe metadata to regression tests --- tests/fixtures.py | 60 ++++- .../regression/test_repository_read_write.py | 238 +++++++++++++++++- 2 files changed, 284 insertions(+), 14 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 5dcffdb0..390b7a77 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -573,7 +573,7 @@ def project_json(): "created_at": datetime.datetime(2024, 1, 1), "description": "test project description", "github_url": "github.com", - "id": str(uuid.uuid4()), + "id": "ccf6b8f8-a166-4084-a51f-4f2b6afd2ad9", "training_metadata": [["training", "metadata"]], } @@ -588,7 +588,7 @@ def experiment_json(): "commit_hash": "abcde01", "created_at": datetime.datetime(2024, 1, 1), "description": "test experiment description", - "id": str(uuid.uuid4()), + "id": "69e374cd-220b-4cda-9608-52277b38a976", "model_name": "test model", "name": "test experiment", "tags": ["tag_a", "tag_b"], @@ -640,12 +640,56 @@ def parameter_json(): @pytest.fixture -def artifact_json(): - """JSON representation of an artifact.""" - return {"id": str(uuid.uuid4())} +def artifact_project_json(): + """JSON representation of an artifact belonging to a project.""" + return { + "name": "test artifact", + "comments": ["comment a", "comment b"], + "created_at": datetime.datetime(2024, 1, 1), + "description": "test parameter description", + "id": str(uuid.uuid4()), + "parent_id": "ccf6b8f8-a166-4084-a51f-4f2b6afd2ad9", + "tags": ["tag_a", "tag_b"], + } + + +@pytest.fixture +def artifact_experiment_json(): + """JSON representation of an artifact belonging to an experiment.""" + return { + "name": "test artifact", + "comments": ["comment a", "comment b"], + "created_at": datetime.datetime(2024, 1, 1), + "description": "test parameter description", + "id": str(uuid.uuid4()), + "parent_id": "69e374cd-220b-4cda-9608-52277b38a976", + "tags": ["tag_a", "tag_b"], + } + + +@pytest.fixture +def dataframe_project_json(): + """JSON representation of a dataframe belonging to a project.""" + return { + "comments": ["comment a", "comment b"], + "created_at": datetime.datetime(2024, 1, 1), + "description": "test parameter description", + "id": str(uuid.uuid4()), + "name": "test dataframe", + "parent_id": "ccf6b8f8-a166-4084-a51f-4f2b6afd2ad9", + "tags": ["tag_a", "tag_b"], + } @pytest.fixture -def dataframe_json(): - """JSON representation of a dataframe.""" - return {"id": str(uuid.uuid4())} +def dataframe_experiment_json(): + """JSON representation of a dataframe belonging to an experiment.""" + return { + "comments": ["comment a", "comment b"], + "created_at": datetime.datetime(2024, 1, 1), + "description": "test parameter description", + "id": str(uuid.uuid4()), + "name": "test dataframe", + "parent_id": "69e374cd-220b-4cda-9608-52277b38a976", + "tags": ["tag_a", "tag_b"], + } diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 95837eea..07894985 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -2,15 +2,21 @@ import tempfile import fsspec +import pandas as pd from rubicon_ml import domain from rubicon_ml.repository import LocalRepository from rubicon_ml.repository.utils import json, slugify +TEST_ARTIFACT_BINARY = b"test" +TEST_DATAFRAME = pd.DataFrame([[0]]) + def test_read_regression( - artifact_json, - dataframe_json, + artifact_project_json, + artifact_experiment_json, + dataframe_project_json, + dataframe_experiment_json, experiment_json, feature_json, metric_json, @@ -110,10 +116,94 @@ def test_read_regression( assert parameter == parameter_json + expected_artifact_project_dir = os.path.join( + expected_project_dir, + "artifacts", + artifact_project_json["id"], + ) + expected_artifact_project_path = os.path.join( + expected_artifact_project_dir, "metadata.json" + ) + + filesystem.mkdirs(os.path.dirname(expected_artifact_project_path), exist_ok=True) + with filesystem.open(expected_artifact_project_path, "w") as file: + file.write(json.dumps(artifact_project_json)) + + artifact_project = repository.get_artifact_metadata( + project_json["name"], + artifact_project_json["id"], + ).__dict__ + + assert artifact_project == artifact_project_json + + expected_artifact_experiment_dir = os.path.join( + expected_experiment_dir, + "artifacts", + artifact_experiment_json["id"], + ) + expected_artifact_experiment_path = os.path.join( + expected_artifact_experiment_dir, "metadata.json" + ) + + filesystem.mkdirs(os.path.dirname(expected_artifact_experiment_path), exist_ok=True) + with filesystem.open(expected_artifact_experiment_path, "w") as file: + file.write(json.dumps(artifact_experiment_json)) + + artifact_experiment = repository.get_artifact_metadata( + project_json["name"], + artifact_experiment_json["id"], + experiment_json["id"], + ).__dict__ + + assert artifact_experiment == artifact_experiment_json + + expected_dataframe_project_dir = os.path.join( + expected_project_dir, + "dataframes", + dataframe_project_json["id"], + ) + expected_dataframe_project_path = os.path.join( + expected_dataframe_project_dir, "metadata.json" + ) + + filesystem.mkdirs(os.path.dirname(expected_dataframe_project_path), exist_ok=True) + with filesystem.open(expected_dataframe_project_path, "w") as file: + file.write(json.dumps(dataframe_project_json)) + + dataframe_project = repository.get_dataframe_metadata( + project_json["name"], + dataframe_project_json["id"], + ).__dict__ + + assert dataframe_project == dataframe_project_json + + expected_dataframe_experiment_dir = os.path.join( + expected_experiment_dir, + "dataframes", + dataframe_experiment_json["id"], + ) + expected_dataframe_experiment_path = os.path.join( + expected_dataframe_experiment_dir, "metadata.json" + ) + + filesystem.mkdirs(os.path.dirname(expected_dataframe_experiment_path), exist_ok=True) + with filesystem.open(expected_dataframe_experiment_path, "w") as file: + file.write(json.dumps(dataframe_experiment_json)) + + dataframe_experiment = repository.get_dataframe_metadata( + project_json["name"], + dataframe_experiment_json["id"], + experiment_json["id"], + ).__dict__ + + assert dataframe_experiment == dataframe_experiment_json + def test_read_write_regression( - artifact_json, - dataframe_json, + artifact_project_json, + artifact_experiment_json, + dataframe_project_json, + dataframe_experiment_json, experiment_json, feature_json, metric_json, @@ -177,10 +267,64 @@ def test_read_write_regression( assert parameter == parameter_json + repository.create_artifact( + domain.Artifact(**artifact_project_json), + TEST_ARTIFACT_BINARY, + project_json["name"], + ) + artifact_project = repository.get_artifact_metadata( + project_json["name"], + artifact_project_json["id"], + ).__dict__ + + assert artifact_project == artifact_project_json + + repository.create_artifact( + domain.Artifact(**artifact_experiment_json), + TEST_ARTIFACT_BINARY, + project_json["name"], + experiment_json["id"], + ) + artifact_experiment = repository.get_artifact_metadata( + project_json["name"], + artifact_experiment_json["id"], + experiment_json["id"], + ).__dict__ + + assert artifact_experiment == artifact_experiment_json + + repository.create_dataframe( + domain.Dataframe(**dataframe_project_json), + TEST_DATAFRAME, + project_json["name"], + ) + dataframe_project = repository.get_dataframe_metadata( + project_json["name"], + dataframe_project_json["id"], + ).__dict__ + + assert dataframe_project == dataframe_project_json + + repository.create_dataframe( + domain.Dataframe(**dataframe_experiment_json), + TEST_DATAFRAME, + project_json["name"], + experiment_json["id"], + ) + dataframe_experiment = repository.get_dataframe_metadata( + project_json["name"], + dataframe_experiment_json["id"], + experiment_json["id"], + ).__dict__ + + assert dataframe_experiment == dataframe_experiment_json + def test_write_regression( - artifact_json, - dataframe_json, + artifact_project_json, + artifact_experiment_json, + dataframe_project_json, + dataframe_experiment_json, experiment_json, feature_json, metric_json, @@ -271,3 +415,85 @@ def test_write_regression( parameter = json.loads(file.read()) assert parameter == parameter_json + + repository.create_artifact( + domain.Artifact(**artifact_project_json), + TEST_ARTIFACT_BINARY, + project_json["name"], + ) + + expected_artifact_project_dir = os.path.join( + expected_project_dir, + "artifacts", + artifact_project_json["id"], + ) + expected_artifact_project_path = os.path.join( + expected_artifact_project_dir, "metadata.json" + ) + + with filesystem.open(expected_artifact_project_path, "r") as file: + artifact_project = json.loads(file.read()) + + assert artifact_project == artifact_project_json + + repository.create_artifact( + domain.Artifact(**artifact_experiment_json), + TEST_ARTIFACT_BINARY, + project_json["name"], + experiment_json["id"], + ) + + expected_artifact_experiment_dir = os.path.join( + expected_experiment_dir, + "artifacts", + artifact_experiment_json["id"], + ) + expected_artifact_experiment_path = os.path.join( + expected_artifact_experiment_dir, "metadata.json" + ) + + with filesystem.open(expected_artifact_experiment_path, "r") as file: + artifact_experiment = json.loads(file.read()) + + assert artifact_experiment == artifact_experiment_json + + repository.create_dataframe( + domain.Dataframe(**dataframe_project_json), + TEST_DATAFRAME, + project_json["name"], + ) + + expected_dataframe_project_dir = os.path.join( + expected_project_dir, + "dataframes", + dataframe_project_json["id"], + ) + expected_dataframe_project_path = os.path.join( + expected_dataframe_project_dir, "metadata.json" + ) + + with filesystem.open(expected_dataframe_project_path, "r") as file: + dataframe_project = json.loads(file.read()) + + assert dataframe_project == dataframe_project_json + + repository.create_dataframe( + domain.Dataframe(**dataframe_experiment_json), + TEST_DATAFRAME, + project_json["name"], + experiment_json["id"], + ) + + expected_dataframe_experiment_dir = os.path.join( + expected_experiment_dir, + "dataframes", + dataframe_experiment_json["id"], + ) + expected_dataframe_experiment_path = os.path.join( + expected_dataframe_experiment_dir, "metadata.json" + ) + + with filesystem.open(expected_dataframe_experiment_path, "r") as file: + dataframe_experiment = json.loads(file.read()) + + assert dataframe_experiment == dataframe_experiment_json From 8d2f92e7b709c9015208d26c5a1136ceab01e485 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Wed, 28 Aug 2024 16:46:48 -0400 Subject: [PATCH 07/16] add artifact & dataframe binary data to regression tests --- .../regression/test_repository_read_write.py | 98 ++++++++++++++++++- 1 file changed, 93 insertions(+), 5 deletions(-) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 07894985..4fe8d412 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -33,7 +33,9 @@ def test_read_regression( expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) expected_project_path = os.path.join(expected_project_dir, "metadata.json") - filesystem.mkdirs(os.path.dirname(expected_project_path), exist_ok=True) + filesystem.mkdirs( + os.path.dirname(expected_project_path), exist_ok=True + ) # TODO: NO DIRNAME (ALL) with filesystem.open(expected_project_path, "w") as file: file.write(json.dumps(project_json)) @@ -124,17 +126,25 @@ def test_read_regression( expected_artifact_project_path = os.path.join( expected_artifact_project_dir, "metadata.json" ) + expected_artifact_project_data_path = os.path.join(expected_artifact_project_dir, "data") - filesystem.mkdirs(os.path.dirname(expected_artifact_project_path), exist_ok=True) + filesystem.mkdirs(expected_artifact_project_dir, exist_ok=True) with filesystem.open(expected_artifact_project_path, "w") as file: file.write(json.dumps(artifact_project_json)) + with filesystem.open(expected_artifact_project_data_path, "wb") as file: + file.write(TEST_ARTIFACT_BINARY) artifact_project = repository.get_artifact_metadata( project_json["name"], artifact_project_json["id"], ).__dict__ + artifact_project_data = repository.get_artifact_data( + project_json["name"], + artifact_project_json["id"], + ) assert artifact_project == artifact_project_json + assert artifact_project_data == TEST_ARTIFACT_BINARY expected_artifact_experiment_dir = os.path.join( expected_experiment_dir, @@ -144,18 +154,29 @@ def test_read_regression( expected_artifact_experiment_path = os.path.join( expected_artifact_experiment_dir, "metadata.json" ) + expected_artifact_experiment_data_path = os.path.join( + expected_artifact_experiment_dir, "data" + ) - filesystem.mkdirs(os.path.dirname(expected_artifact_experiment_path), exist_ok=True) + filesystem.mkdirs(expected_artifact_experiment_dir, exist_ok=True) with filesystem.open(expected_artifact_experiment_path, "w") as file: file.write(json.dumps(artifact_experiment_json)) + with filesystem.open(expected_artifact_experiment_data_path, "wb") as file: + file.write(TEST_ARTIFACT_BINARY) artifact_experiment = repository.get_artifact_metadata( project_json["name"], artifact_experiment_json["id"], experiment_json["id"], ).__dict__ + artifact_experiment_data = repository.get_artifact_data( + project_json["name"], + artifact_experiment_json["id"], + experiment_json["id"], + ) assert artifact_experiment == artifact_experiment_json + assert artifact_experiment_data == TEST_ARTIFACT_BINARY expected_dataframe_project_dir = os.path.join( expected_project_dir, @@ -165,17 +186,28 @@ def test_read_regression( expected_dataframe_project_path = os.path.join( expected_dataframe_project_dir, "metadata.json" ) + expected_dataframe_project_data_dir = os.path.join(expected_dataframe_project_dir, "data") + expected_dataframe_project_data_path = os.path.join( + expected_dataframe_project_data_dir, "data.parquet" + ) - filesystem.mkdirs(os.path.dirname(expected_dataframe_project_path), exist_ok=True) + filesystem.mkdirs(expected_dataframe_project_dir, exist_ok=True) + filesystem.mkdirs(expected_dataframe_project_data_dir, exist_ok=True) with filesystem.open(expected_dataframe_project_path, "w") as file: file.write(json.dumps(dataframe_project_json)) + TEST_DATAFRAME.to_parquet(expected_dataframe_project_data_path) dataframe_project = repository.get_dataframe_metadata( project_json["name"], dataframe_project_json["id"], ).__dict__ + dataframe_project_data = repository.get_dataframe_data( + project_json["name"], + dataframe_project_json["id"], + ) assert dataframe_project == dataframe_project_json + assert dataframe_project_data.equals(TEST_DATAFRAME) expected_dataframe_experiment_dir = os.path.join( expected_experiment_dir, @@ -185,18 +217,32 @@ def test_read_regression( expected_dataframe_experiment_path = os.path.join( expected_dataframe_experiment_dir, "metadata.json" ) + expected_dataframe_experiment_data_dir = os.path.join( + expected_dataframe_experiment_dir, "data" + ) + expected_dataframe_experiment_data_path = os.path.join( + expected_dataframe_experiment_data_dir, "data.parquet" + ) - filesystem.mkdirs(os.path.dirname(expected_dataframe_experiment_path), exist_ok=True) + filesystem.mkdirs(expected_dataframe_experiment_dir, exist_ok=True) + filesystem.mkdirs(expected_dataframe_experiment_data_dir, exist_ok=True) with filesystem.open(expected_dataframe_experiment_path, "w") as file: file.write(json.dumps(dataframe_experiment_json)) + TEST_DATAFRAME.to_parquet(expected_dataframe_experiment_data_path) dataframe_experiment = repository.get_dataframe_metadata( project_json["name"], dataframe_experiment_json["id"], experiment_json["id"], ).__dict__ + dataframe_experiment_data = repository.get_dataframe_data( + project_json["name"], + dataframe_experiment_json["id"], + experiment_json["id"], + ) assert dataframe_experiment == dataframe_experiment_json + assert dataframe_experiment_data.equals(TEST_DATAFRAME) def test_read_write_regression( @@ -276,8 +322,13 @@ def test_read_write_regression( project_json["name"], artifact_project_json["id"], ).__dict__ + artifact_project_data = repository.get_artifact_data( + project_json["name"], + artifact_project_json["id"], + ) assert artifact_project == artifact_project_json + assert artifact_project_data == TEST_ARTIFACT_BINARY repository.create_artifact( domain.Artifact(**artifact_experiment_json), @@ -290,8 +341,14 @@ def test_read_write_regression( artifact_experiment_json["id"], experiment_json["id"], ).__dict__ + artifact_experiment_data = repository.get_artifact_data( + project_json["name"], + artifact_experiment_json["id"], + experiment_json["id"], + ) assert artifact_experiment == artifact_experiment_json + assert artifact_experiment_data == TEST_ARTIFACT_BINARY repository.create_dataframe( domain.Dataframe(**dataframe_project_json), @@ -302,8 +359,13 @@ def test_read_write_regression( project_json["name"], dataframe_project_json["id"], ).__dict__ + dataframe_project_data = repository.get_dataframe_data( + project_json["name"], + dataframe_project_json["id"], + ) assert dataframe_project == dataframe_project_json + assert dataframe_project_data.equals(TEST_DATAFRAME) repository.create_dataframe( domain.Dataframe(**dataframe_experiment_json), @@ -316,8 +378,14 @@ def test_read_write_regression( dataframe_experiment_json["id"], experiment_json["id"], ).__dict__ + dataframe_experiment_data = repository.get_dataframe_data( + project_json["name"], + dataframe_experiment_json["id"], + experiment_json["id"], + ) assert dataframe_experiment == dataframe_experiment_json + assert dataframe_experiment_data.equals(TEST_DATAFRAME) def test_write_regression( @@ -430,11 +498,15 @@ def test_write_regression( expected_artifact_project_path = os.path.join( expected_artifact_project_dir, "metadata.json" ) + expected_artifact_project_data_path = os.path.join(expected_artifact_project_dir, "data") with filesystem.open(expected_artifact_project_path, "r") as file: artifact_project = json.loads(file.read()) + with filesystem.open(expected_artifact_project_data_path, "rb") as file: + artifact_project_data = file.read() assert artifact_project == artifact_project_json + assert artifact_project_data == TEST_ARTIFACT_BINARY repository.create_artifact( domain.Artifact(**artifact_experiment_json), @@ -451,11 +523,17 @@ def test_write_regression( expected_artifact_experiment_path = os.path.join( expected_artifact_experiment_dir, "metadata.json" ) + expected_artifact_experiment_data_path = os.path.join( + expected_artifact_experiment_dir, "data" + ) with filesystem.open(expected_artifact_experiment_path, "r") as file: artifact_experiment = json.loads(file.read()) + with filesystem.open(expected_artifact_experiment_data_path, "rb") as file: + artifact_experiment_data = file.read() assert artifact_experiment == artifact_experiment_json + assert artifact_experiment_data == TEST_ARTIFACT_BINARY repository.create_dataframe( domain.Dataframe(**dataframe_project_json), @@ -471,11 +549,16 @@ def test_write_regression( expected_dataframe_project_path = os.path.join( expected_dataframe_project_dir, "metadata.json" ) + expected_dataframe_project_data_path = os.path.join( + expected_dataframe_project_dir, "data", "data.parquet" + ) with filesystem.open(expected_dataframe_project_path, "r") as file: dataframe_project = json.loads(file.read()) + dataframe_project_data = pd.read_parquet(expected_dataframe_project_data_path) assert dataframe_project == dataframe_project_json + assert dataframe_project_data.equals(TEST_DATAFRAME) repository.create_dataframe( domain.Dataframe(**dataframe_experiment_json), @@ -492,8 +575,13 @@ def test_write_regression( expected_dataframe_experiment_path = os.path.join( expected_dataframe_experiment_dir, "metadata.json" ) + expected_dataframe_experiment_data_path = os.path.join( + expected_dataframe_experiment_dir, "data", "data.parquet" + ) with filesystem.open(expected_dataframe_experiment_path, "r") as file: dataframe_experiment = json.loads(file.read()) + dataframe_experiment_data = pd.read_parquet(expected_dataframe_experiment_data_path) assert dataframe_experiment == dataframe_experiment_json + assert dataframe_experiment_data.equals(TEST_DATAFRAME) From 18e39f08ba7a65867a4456ee37d5a25548b391fd Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Wed, 28 Aug 2024 16:51:05 -0400 Subject: [PATCH 08/16] path cleanup --- tests/regression/test_repository_read_write.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 4fe8d412..3e0fd6fc 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -33,9 +33,7 @@ def test_read_regression( expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) expected_project_path = os.path.join(expected_project_dir, "metadata.json") - filesystem.mkdirs( - os.path.dirname(expected_project_path), exist_ok=True - ) # TODO: NO DIRNAME (ALL) + filesystem.mkdirs(expected_project_dir, exist_ok=True) with filesystem.open(expected_project_path, "w") as file: file.write(json.dumps(project_json)) @@ -50,7 +48,7 @@ def test_read_regression( ) expected_experiment_path = os.path.join(expected_experiment_dir, "metadata.json") - filesystem.mkdirs(os.path.dirname(expected_experiment_path), exist_ok=True) + filesystem.mkdirs(expected_experiment_dir, exist_ok=True) with filesystem.open(expected_experiment_path, "w") as file: file.write(json.dumps(experiment_json)) @@ -68,7 +66,7 @@ def test_read_regression( ) expected_feature_path = os.path.join(expected_feature_dir, "metadata.json") - filesystem.mkdirs(os.path.dirname(expected_feature_path), exist_ok=True) + filesystem.mkdirs(expected_feature_dir, exist_ok=True) with filesystem.open(expected_feature_path, "w") as file: file.write(json.dumps(feature_json)) @@ -87,7 +85,7 @@ def test_read_regression( ) expected_metric_path = os.path.join(expected_metric_dir, "metadata.json") - filesystem.mkdirs(os.path.dirname(expected_metric_path), exist_ok=True) + filesystem.mkdirs(expected_metric_dir, exist_ok=True) with filesystem.open(expected_metric_path, "w") as file: file.write(json.dumps(metric_json)) @@ -106,7 +104,7 @@ def test_read_regression( ) expected_parameter_path = os.path.join(expected_parameter_dir, "metadata.json") - filesystem.mkdirs(os.path.dirname(expected_parameter_path), exist_ok=True) + filesystem.mkdirs(expected_parameter_dir, exist_ok=True) with filesystem.open(expected_parameter_path, "w") as file: file.write(json.dumps(parameter_json)) From dee5a95dbefeded488763e9c5ed0eb8aef84d595 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Thu, 29 Aug 2024 11:30:41 -0400 Subject: [PATCH 09/16] add tag updates to regression tests --- .../regression/test_repository_read_write.py | 224 ++++++++++++++++++ 1 file changed, 224 insertions(+) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 3e0fd6fc..dd24c03d 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -1,5 +1,6 @@ import os import tempfile +import uuid import fsspec import pandas as pd @@ -8,6 +9,8 @@ from rubicon_ml.repository import LocalRepository from rubicon_ml.repository.utils import json, slugify +TAGS_TO_ADD = ["added_a", "added_b"] +TAGS_TO_REMOVE = ["added_a"] TEST_ARTIFACT_BINARY = b"test" TEST_DATAFRAME = pd.DataFrame([[0]]) @@ -30,6 +33,25 @@ def test_read_regression( root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = LocalRepository(root_dir=root_dir) + def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs): + tag_path = os.path.join(tag_dir, f"tags_{uuid.uuid4()}.json") + with filesystem.open(tag_path, "w") as file: + file.write(json.dumps({"added_tags": TAGS_TO_ADD})) + + tag_path = os.path.join(tag_dir, f"tags_{uuid.uuid4()}.json") + with filesystem.open(tag_path, "w") as file: + file.write(json.dumps({"removed_tags": TAGS_TO_REMOVE})) + + additional_tags = repository.get_tags( + project_name, + **entity_identification_kwargs, + ) + + return ( + additional_tags[0]["added_tags"] == TAGS_TO_ADD + and additional_tags[1]["removed_tags"] == TAGS_TO_REMOVE + ) + expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) expected_project_path = os.path.join(expected_project_dir, "metadata.json") @@ -58,6 +80,13 @@ def test_read_regression( ).__dict__ assert experiment == experiment_json + assert __test_additional_tags( + expected_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=experiment_json["id"], + entity_type="Experiment", + ) expected_feature_dir = os.path.join( expected_experiment_dir, @@ -77,6 +106,13 @@ def test_read_regression( ).__dict__ assert feature == feature_json + assert __test_additional_tags( + expected_feature_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=feature_json["name"], + entity_type="Feature", + ) expected_metric_dir = os.path.join( expected_experiment_dir, @@ -96,6 +132,13 @@ def test_read_regression( ).__dict__ assert metric == metric_json + assert __test_additional_tags( + expected_metric_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=metric_json["name"], + entity_type="Metric", + ) expected_parameter_dir = os.path.join( expected_experiment_dir, @@ -115,6 +158,13 @@ def test_read_regression( ).__dict__ assert parameter == parameter_json + assert __test_additional_tags( + expected_parameter_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=parameter_json["name"], + entity_type="Parameter", + ) expected_artifact_project_dir = os.path.join( expected_project_dir, @@ -143,6 +193,12 @@ def test_read_regression( assert artifact_project == artifact_project_json assert artifact_project_data == TEST_ARTIFACT_BINARY + assert __test_additional_tags( + expected_artifact_project_dir, + project_json["name"], + entity_identifier=artifact_project_json["id"], + entity_type="Artifact", + ) expected_artifact_experiment_dir = os.path.join( expected_experiment_dir, @@ -175,6 +231,13 @@ def test_read_regression( assert artifact_experiment == artifact_experiment_json assert artifact_experiment_data == TEST_ARTIFACT_BINARY + assert __test_additional_tags( + expected_artifact_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=artifact_experiment_json["id"], + entity_type="Artifact", + ) expected_dataframe_project_dir = os.path.join( expected_project_dir, @@ -206,6 +269,12 @@ def test_read_regression( assert dataframe_project == dataframe_project_json assert dataframe_project_data.equals(TEST_DATAFRAME) + assert __test_additional_tags( + expected_dataframe_project_dir, + project_json["name"], + entity_identifier=dataframe_project_json["id"], + entity_type="Dataframe", + ) expected_dataframe_experiment_dir = os.path.join( expected_experiment_dir, @@ -241,6 +310,13 @@ def test_read_regression( assert dataframe_experiment == dataframe_experiment_json assert dataframe_experiment_data.equals(TEST_DATAFRAME) + assert __test_additional_tags( + expected_dataframe_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=dataframe_experiment_json["id"], + entity_type="Dataframe", + ) def test_read_write_regression( @@ -259,6 +335,27 @@ def test_read_write_regression( root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = LocalRepository(root_dir=root_dir) + def __test_additional_tags(project_name, **entity_identification_kwargs): + repository.add_tags( + project_name, + TAGS_TO_ADD, + **entity_identification_kwargs, + ) + repository.remove_tags( + project_name, + TAGS_TO_REMOVE, + **entity_identification_kwargs, + ) + additional_tags = repository.get_tags( + project_name, + **entity_identification_kwargs, + ) + + return ( + additional_tags[0]["added_tags"] == TAGS_TO_ADD + and additional_tags[1]["removed_tags"] == TAGS_TO_REMOVE + ) + repository.create_project(domain.Project(**project_json)) project = repository.get_project(project_json["name"]).__dict__ @@ -271,6 +368,12 @@ def test_read_write_regression( ).__dict__ assert experiment == experiment_json + assert __test_additional_tags( + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=experiment_json["id"], + entity_type="Experiment", + ) repository.create_feature( domain.Feature(**feature_json), @@ -284,6 +387,12 @@ def test_read_write_regression( ).__dict__ assert feature == feature_json + assert __test_additional_tags( + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=feature_json["name"], + entity_type="Feature", + ) repository.create_metric( domain.Metric(**metric_json), @@ -297,6 +406,12 @@ def test_read_write_regression( ).__dict__ assert metric == metric_json + assert __test_additional_tags( + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=metric_json["name"], + entity_type="Metric", + ) repository.create_parameter( domain.Parameter(**parameter_json), @@ -310,6 +425,12 @@ def test_read_write_regression( ).__dict__ assert parameter == parameter_json + assert __test_additional_tags( + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=parameter_json["name"], + entity_type="Parameter", + ) repository.create_artifact( domain.Artifact(**artifact_project_json), @@ -327,6 +448,11 @@ def test_read_write_regression( assert artifact_project == artifact_project_json assert artifact_project_data == TEST_ARTIFACT_BINARY + assert __test_additional_tags( + project_json["name"], + entity_identifier=artifact_project_json["id"], + entity_type="Artifact", + ) repository.create_artifact( domain.Artifact(**artifact_experiment_json), @@ -347,6 +473,12 @@ def test_read_write_regression( assert artifact_experiment == artifact_experiment_json assert artifact_experiment_data == TEST_ARTIFACT_BINARY + assert __test_additional_tags( + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=artifact_experiment_json["id"], + entity_type="Artifact", + ) repository.create_dataframe( domain.Dataframe(**dataframe_project_json), @@ -364,6 +496,11 @@ def test_read_write_regression( assert dataframe_project == dataframe_project_json assert dataframe_project_data.equals(TEST_DATAFRAME) + assert __test_additional_tags( + project_json["name"], + entity_identifier=dataframe_project_json["id"], + entity_type="Dataframe", + ) repository.create_dataframe( domain.Dataframe(**dataframe_experiment_json), @@ -384,6 +521,12 @@ def test_read_write_regression( assert dataframe_experiment == dataframe_experiment_json assert dataframe_experiment_data.equals(TEST_DATAFRAME) + assert __test_additional_tags( + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=dataframe_experiment_json["id"], + entity_type="Dataframe", + ) def test_write_regression( @@ -404,6 +547,33 @@ def test_write_regression( root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = LocalRepository(root_dir=root_dir) + def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs): + is_passing = True + + repository.add_tags( + project_name, + TAGS_TO_ADD, + **entity_identification_kwargs, + ) + repository.remove_tags( + project_name, + TAGS_TO_REMOVE, + **entity_identification_kwargs, + ) + + tag_path = os.path.join(tag_dir, "tags_*.json") + tag_files = filesystem.glob(tag_path, detail=True) + for tag_file in tag_files: + with filesystem.open(tag_file, "r") as file: + tags = json.loads(file.read()) + + if "added_tags" in tags: + is_passing &= tags["added_tags"] == TAGS_TO_ADD + if "removed_tags" in tags: + is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE + + return is_passing + repository.create_project(domain.Project(**project_json)) expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) @@ -427,6 +597,13 @@ def test_write_regression( experiment = json.loads(file.read()) assert experiment == experiment_json + assert __test_additional_tags( + expected_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=experiment_json["id"], + entity_type="Experiment", + ) repository.create_feature( domain.Feature(**feature_json), @@ -445,6 +622,13 @@ def test_write_regression( feature = json.loads(file.read()) assert feature == feature_json + assert __test_additional_tags( + expected_feature_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=feature_json["name"], + entity_type="Feature", + ) repository.create_metric( domain.Metric(**metric_json), @@ -463,6 +647,13 @@ def test_write_regression( metric = json.loads(file.read()) assert metric == metric_json + assert __test_additional_tags( + expected_metric_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=metric_json["name"], + entity_type="Metric", + ) repository.create_parameter( domain.Parameter(**parameter_json), @@ -481,6 +672,13 @@ def test_write_regression( parameter = json.loads(file.read()) assert parameter == parameter_json + assert __test_additional_tags( + expected_parameter_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=parameter_json["name"], + entity_type="Parameter", + ) repository.create_artifact( domain.Artifact(**artifact_project_json), @@ -505,6 +703,12 @@ def test_write_regression( assert artifact_project == artifact_project_json assert artifact_project_data == TEST_ARTIFACT_BINARY + assert __test_additional_tags( + expected_artifact_project_dir, + project_json["name"], + entity_identifier=artifact_project_json["id"], + entity_type="Artifact", + ) repository.create_artifact( domain.Artifact(**artifact_experiment_json), @@ -532,6 +736,13 @@ def test_write_regression( assert artifact_experiment == artifact_experiment_json assert artifact_experiment_data == TEST_ARTIFACT_BINARY + assert __test_additional_tags( + expected_artifact_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=artifact_experiment_json["id"], + entity_type="Artifact", + ) repository.create_dataframe( domain.Dataframe(**dataframe_project_json), @@ -557,6 +768,12 @@ def test_write_regression( assert dataframe_project == dataframe_project_json assert dataframe_project_data.equals(TEST_DATAFRAME) + assert __test_additional_tags( + expected_dataframe_project_dir, + project_json["name"], + entity_identifier=dataframe_project_json["id"], + entity_type="Dataframe", + ) repository.create_dataframe( domain.Dataframe(**dataframe_experiment_json), @@ -583,3 +800,10 @@ def test_write_regression( assert dataframe_experiment == dataframe_experiment_json assert dataframe_experiment_data.equals(TEST_DATAFRAME) + assert __test_additional_tags( + expected_dataframe_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=dataframe_experiment_json["id"], + entity_type="Dataframe", + ) From 32cd71ba0a6b05176874cb1c60f1c4c86654dec0 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Thu, 29 Aug 2024 11:49:24 -0400 Subject: [PATCH 10/16] add comment updates to regression tests --- .../regression/test_repository_read_write.py | 120 +++++++++++++----- 1 file changed, 89 insertions(+), 31 deletions(-) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index dd24c03d..a2f46baa 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -9,6 +9,8 @@ from rubicon_ml.repository import LocalRepository from rubicon_ml.repository.utils import json, slugify +COMMENTS_TO_ADD = ["comment_a", "comment_b"] +COMMENTS_TO_REMOVE = ["comment_a"] TAGS_TO_ADD = ["added_a", "added_b"] TAGS_TO_REMOVE = ["added_a"] TEST_ARTIFACT_BINARY = b"test" @@ -33,23 +35,39 @@ def test_read_regression( root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = LocalRepository(root_dir=root_dir) - def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs): - tag_path = os.path.join(tag_dir, f"tags_{uuid.uuid4()}.json") - with filesystem.open(tag_path, "w") as file: + def __test_additional_tags_and_comments( + tag_comment_dir, project_name, **entity_identification_kwargs + ): + add_tag_path = os.path.join(tag_comment_dir, f"tags_{uuid.uuid4()}.json") + with filesystem.open(add_tag_path, "w") as file: file.write(json.dumps({"added_tags": TAGS_TO_ADD})) - tag_path = os.path.join(tag_dir, f"tags_{uuid.uuid4()}.json") - with filesystem.open(tag_path, "w") as file: + remove_tag_path = os.path.join(tag_comment_dir, f"tags_{uuid.uuid4()}.json") + with filesystem.open(remove_tag_path, "w") as file: file.write(json.dumps({"removed_tags": TAGS_TO_REMOVE})) + add_comment_path = os.path.join(tag_comment_dir, f"comments_{uuid.uuid4()}.json") + with filesystem.open(add_comment_path, "w") as file: + file.write(json.dumps({"added_comments": COMMENTS_TO_ADD})) + + remove_comment_path = os.path.join(tag_comment_dir, f"comments_{uuid.uuid4()}.json") + with filesystem.open(remove_comment_path, "w") as file: + file.write(json.dumps({"removed_comments": COMMENTS_TO_REMOVE})) + additional_tags = repository.get_tags( project_name, **entity_identification_kwargs, ) + additional_comments = repository.get_comments( + project_name, + **entity_identification_kwargs, + ) return ( additional_tags[0]["added_tags"] == TAGS_TO_ADD and additional_tags[1]["removed_tags"] == TAGS_TO_REMOVE + and additional_comments[0]["added_comments"] == COMMENTS_TO_ADD + and additional_comments[1]["removed_comments"] == COMMENTS_TO_REMOVE ) expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) @@ -80,7 +98,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs ).__dict__ assert experiment == experiment_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -106,7 +124,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs ).__dict__ assert feature == feature_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_feature_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -132,7 +150,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs ).__dict__ assert metric == metric_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_metric_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -158,7 +176,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs ).__dict__ assert parameter == parameter_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_parameter_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -193,7 +211,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs assert artifact_project == artifact_project_json assert artifact_project_data == TEST_ARTIFACT_BINARY - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_artifact_project_dir, project_json["name"], entity_identifier=artifact_project_json["id"], @@ -231,7 +249,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs assert artifact_experiment == artifact_experiment_json assert artifact_experiment_data == TEST_ARTIFACT_BINARY - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_artifact_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -269,7 +287,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs assert dataframe_project == dataframe_project_json assert dataframe_project_data.equals(TEST_DATAFRAME) - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_dataframe_project_dir, project_json["name"], entity_identifier=dataframe_project_json["id"], @@ -310,7 +328,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs assert dataframe_experiment == dataframe_experiment_json assert dataframe_experiment_data.equals(TEST_DATAFRAME) - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_dataframe_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -335,7 +353,7 @@ def test_read_write_regression( root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = LocalRepository(root_dir=root_dir) - def __test_additional_tags(project_name, **entity_identification_kwargs): + def __test_additional_tags_and_comments(project_name, **entity_identification_kwargs): repository.add_tags( project_name, TAGS_TO_ADD, @@ -350,10 +368,26 @@ def __test_additional_tags(project_name, **entity_identification_kwargs): project_name, **entity_identification_kwargs, ) + repository.add_comments( + project_name, + COMMENTS_TO_ADD, + **entity_identification_kwargs, + ) + repository.remove_comments( + project_name, + COMMENTS_TO_REMOVE, + **entity_identification_kwargs, + ) + additional_comments = repository.get_comments( + project_name, + **entity_identification_kwargs, + ) return ( additional_tags[0]["added_tags"] == TAGS_TO_ADD and additional_tags[1]["removed_tags"] == TAGS_TO_REMOVE + and additional_comments[0]["added_comments"] == COMMENTS_TO_ADD + and additional_comments[1]["removed_comments"] == COMMENTS_TO_REMOVE ) repository.create_project(domain.Project(**project_json)) @@ -368,7 +402,7 @@ def __test_additional_tags(project_name, **entity_identification_kwargs): ).__dict__ assert experiment == experiment_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( project_json["name"], experiment_id=experiment_json["id"], entity_identifier=experiment_json["id"], @@ -387,7 +421,7 @@ def __test_additional_tags(project_name, **entity_identification_kwargs): ).__dict__ assert feature == feature_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( project_json["name"], experiment_id=experiment_json["id"], entity_identifier=feature_json["name"], @@ -406,7 +440,7 @@ def __test_additional_tags(project_name, **entity_identification_kwargs): ).__dict__ assert metric == metric_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( project_json["name"], experiment_id=experiment_json["id"], entity_identifier=metric_json["name"], @@ -425,7 +459,7 @@ def __test_additional_tags(project_name, **entity_identification_kwargs): ).__dict__ assert parameter == parameter_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( project_json["name"], experiment_id=experiment_json["id"], entity_identifier=parameter_json["name"], @@ -448,7 +482,7 @@ def __test_additional_tags(project_name, **entity_identification_kwargs): assert artifact_project == artifact_project_json assert artifact_project_data == TEST_ARTIFACT_BINARY - assert __test_additional_tags( + assert __test_additional_tags_and_comments( project_json["name"], entity_identifier=artifact_project_json["id"], entity_type="Artifact", @@ -473,7 +507,7 @@ def __test_additional_tags(project_name, **entity_identification_kwargs): assert artifact_experiment == artifact_experiment_json assert artifact_experiment_data == TEST_ARTIFACT_BINARY - assert __test_additional_tags( + assert __test_additional_tags_and_comments( project_json["name"], experiment_id=experiment_json["id"], entity_identifier=artifact_experiment_json["id"], @@ -496,7 +530,7 @@ def __test_additional_tags(project_name, **entity_identification_kwargs): assert dataframe_project == dataframe_project_json assert dataframe_project_data.equals(TEST_DATAFRAME) - assert __test_additional_tags( + assert __test_additional_tags_and_comments( project_json["name"], entity_identifier=dataframe_project_json["id"], entity_type="Dataframe", @@ -521,7 +555,7 @@ def __test_additional_tags(project_name, **entity_identification_kwargs): assert dataframe_experiment == dataframe_experiment_json assert dataframe_experiment_data.equals(TEST_DATAFRAME) - assert __test_additional_tags( + assert __test_additional_tags_and_comments( project_json["name"], experiment_id=experiment_json["id"], entity_identifier=dataframe_experiment_json["id"], @@ -547,7 +581,9 @@ def test_write_regression( root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = LocalRepository(root_dir=root_dir) - def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs): + def __test_additional_tags_and_comments( + tag_dir, project_name, **entity_identification_kwargs + ): is_passing = True repository.add_tags( @@ -572,6 +608,28 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs if "removed_tags" in tags: is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE + repository.add_comments( + project_name, + COMMENTS_TO_ADD, + **entity_identification_kwargs, + ) + repository.remove_comments( + project_name, + COMMENTS_TO_REMOVE, + **entity_identification_kwargs, + ) + + comment_path = os.path.join(tag_dir, "comments_*.json") + comment_files = filesystem.glob(comment_path, detail=True) + for comment_file in comment_files: + with filesystem.open(comment_file, "r") as file: + comments = json.loads(file.read()) + + if "added_comments" in comments: + is_passing &= comments["added_comments"] == COMMENTS_TO_ADD + if "removed_tags" in comments: + is_passing &= comments["removed_comments"] == COMMENTS_TO_REMOVE + return is_passing repository.create_project(domain.Project(**project_json)) @@ -597,7 +655,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs experiment = json.loads(file.read()) assert experiment == experiment_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -622,7 +680,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs feature = json.loads(file.read()) assert feature == feature_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_feature_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -647,7 +705,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs metric = json.loads(file.read()) assert metric == metric_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_metric_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -672,7 +730,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs parameter = json.loads(file.read()) assert parameter == parameter_json - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_parameter_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -703,7 +761,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs assert artifact_project == artifact_project_json assert artifact_project_data == TEST_ARTIFACT_BINARY - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_artifact_project_dir, project_json["name"], entity_identifier=artifact_project_json["id"], @@ -736,7 +794,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs assert artifact_experiment == artifact_experiment_json assert artifact_experiment_data == TEST_ARTIFACT_BINARY - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_artifact_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -768,7 +826,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs assert dataframe_project == dataframe_project_json assert dataframe_project_data.equals(TEST_DATAFRAME) - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_dataframe_project_dir, project_json["name"], entity_identifier=dataframe_project_json["id"], @@ -800,7 +858,7 @@ def __test_additional_tags(tag_dir, project_name, **entity_identification_kwargs assert dataframe_experiment == dataframe_experiment_json assert dataframe_experiment_data.equals(TEST_DATAFRAME) - assert __test_additional_tags( + assert __test_additional_tags_and_comments( expected_dataframe_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], From 5fb17c024c08e263a119fb747dd72b9c74deb10e Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Thu, 29 Aug 2024 13:24:15 -0400 Subject: [PATCH 11/16] add delete regression test --- .../regression/test_repository_read_write.py | 166 +++++++++++++++--- 1 file changed, 140 insertions(+), 26 deletions(-) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index a2f46baa..c14d3fe2 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -9,12 +9,12 @@ from rubicon_ml.repository import LocalRepository from rubicon_ml.repository.utils import json, slugify +ARTIFACT_BINARY = b"artifact" COMMENTS_TO_ADD = ["comment_a", "comment_b"] COMMENTS_TO_REMOVE = ["comment_a"] +DATAFRAME = pd.DataFrame([[0]]) TAGS_TO_ADD = ["added_a", "added_b"] TAGS_TO_REMOVE = ["added_a"] -TEST_ARTIFACT_BINARY = b"test" -TEST_DATAFRAME = pd.DataFrame([[0]]) def test_read_regression( @@ -198,7 +198,7 @@ def __test_additional_tags_and_comments( with filesystem.open(expected_artifact_project_path, "w") as file: file.write(json.dumps(artifact_project_json)) with filesystem.open(expected_artifact_project_data_path, "wb") as file: - file.write(TEST_ARTIFACT_BINARY) + file.write(ARTIFACT_BINARY) artifact_project = repository.get_artifact_metadata( project_json["name"], @@ -210,7 +210,7 @@ def __test_additional_tags_and_comments( ) assert artifact_project == artifact_project_json - assert artifact_project_data == TEST_ARTIFACT_BINARY + assert artifact_project_data == ARTIFACT_BINARY assert __test_additional_tags_and_comments( expected_artifact_project_dir, project_json["name"], @@ -234,7 +234,7 @@ def __test_additional_tags_and_comments( with filesystem.open(expected_artifact_experiment_path, "w") as file: file.write(json.dumps(artifact_experiment_json)) with filesystem.open(expected_artifact_experiment_data_path, "wb") as file: - file.write(TEST_ARTIFACT_BINARY) + file.write(ARTIFACT_BINARY) artifact_experiment = repository.get_artifact_metadata( project_json["name"], @@ -248,7 +248,7 @@ def __test_additional_tags_and_comments( ) assert artifact_experiment == artifact_experiment_json - assert artifact_experiment_data == TEST_ARTIFACT_BINARY + assert artifact_experiment_data == ARTIFACT_BINARY assert __test_additional_tags_and_comments( expected_artifact_experiment_dir, project_json["name"], @@ -274,7 +274,7 @@ def __test_additional_tags_and_comments( filesystem.mkdirs(expected_dataframe_project_data_dir, exist_ok=True) with filesystem.open(expected_dataframe_project_path, "w") as file: file.write(json.dumps(dataframe_project_json)) - TEST_DATAFRAME.to_parquet(expected_dataframe_project_data_path) + DATAFRAME.to_parquet(expected_dataframe_project_data_path) dataframe_project = repository.get_dataframe_metadata( project_json["name"], @@ -286,7 +286,7 @@ def __test_additional_tags_and_comments( ) assert dataframe_project == dataframe_project_json - assert dataframe_project_data.equals(TEST_DATAFRAME) + assert dataframe_project_data.equals(DATAFRAME) assert __test_additional_tags_and_comments( expected_dataframe_project_dir, project_json["name"], @@ -313,7 +313,7 @@ def __test_additional_tags_and_comments( filesystem.mkdirs(expected_dataframe_experiment_data_dir, exist_ok=True) with filesystem.open(expected_dataframe_experiment_path, "w") as file: file.write(json.dumps(dataframe_experiment_json)) - TEST_DATAFRAME.to_parquet(expected_dataframe_experiment_data_path) + DATAFRAME.to_parquet(expected_dataframe_experiment_data_path) dataframe_experiment = repository.get_dataframe_metadata( project_json["name"], @@ -327,7 +327,7 @@ def __test_additional_tags_and_comments( ) assert dataframe_experiment == dataframe_experiment_json - assert dataframe_experiment_data.equals(TEST_DATAFRAME) + assert dataframe_experiment_data.equals(DATAFRAME) assert __test_additional_tags_and_comments( expected_dataframe_experiment_dir, project_json["name"], @@ -468,7 +468,7 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw repository.create_artifact( domain.Artifact(**artifact_project_json), - TEST_ARTIFACT_BINARY, + ARTIFACT_BINARY, project_json["name"], ) artifact_project = repository.get_artifact_metadata( @@ -481,7 +481,7 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw ) assert artifact_project == artifact_project_json - assert artifact_project_data == TEST_ARTIFACT_BINARY + assert artifact_project_data == ARTIFACT_BINARY assert __test_additional_tags_and_comments( project_json["name"], entity_identifier=artifact_project_json["id"], @@ -490,7 +490,7 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw repository.create_artifact( domain.Artifact(**artifact_experiment_json), - TEST_ARTIFACT_BINARY, + ARTIFACT_BINARY, project_json["name"], experiment_json["id"], ) @@ -506,7 +506,7 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw ) assert artifact_experiment == artifact_experiment_json - assert artifact_experiment_data == TEST_ARTIFACT_BINARY + assert artifact_experiment_data == ARTIFACT_BINARY assert __test_additional_tags_and_comments( project_json["name"], experiment_id=experiment_json["id"], @@ -516,7 +516,7 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw repository.create_dataframe( domain.Dataframe(**dataframe_project_json), - TEST_DATAFRAME, + DATAFRAME, project_json["name"], ) dataframe_project = repository.get_dataframe_metadata( @@ -529,7 +529,7 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw ) assert dataframe_project == dataframe_project_json - assert dataframe_project_data.equals(TEST_DATAFRAME) + assert dataframe_project_data.equals(DATAFRAME) assert __test_additional_tags_and_comments( project_json["name"], entity_identifier=dataframe_project_json["id"], @@ -538,7 +538,7 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw repository.create_dataframe( domain.Dataframe(**dataframe_experiment_json), - TEST_DATAFRAME, + DATAFRAME, project_json["name"], experiment_json["id"], ) @@ -554,7 +554,7 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw ) assert dataframe_experiment == dataframe_experiment_json - assert dataframe_experiment_data.equals(TEST_DATAFRAME) + assert dataframe_experiment_data.equals(DATAFRAME) assert __test_additional_tags_and_comments( project_json["name"], experiment_id=experiment_json["id"], @@ -740,7 +740,7 @@ def __test_additional_tags_and_comments( repository.create_artifact( domain.Artifact(**artifact_project_json), - TEST_ARTIFACT_BINARY, + ARTIFACT_BINARY, project_json["name"], ) @@ -760,7 +760,7 @@ def __test_additional_tags_and_comments( artifact_project_data = file.read() assert artifact_project == artifact_project_json - assert artifact_project_data == TEST_ARTIFACT_BINARY + assert artifact_project_data == ARTIFACT_BINARY assert __test_additional_tags_and_comments( expected_artifact_project_dir, project_json["name"], @@ -770,7 +770,7 @@ def __test_additional_tags_and_comments( repository.create_artifact( domain.Artifact(**artifact_experiment_json), - TEST_ARTIFACT_BINARY, + ARTIFACT_BINARY, project_json["name"], experiment_json["id"], ) @@ -793,7 +793,7 @@ def __test_additional_tags_and_comments( artifact_experiment_data = file.read() assert artifact_experiment == artifact_experiment_json - assert artifact_experiment_data == TEST_ARTIFACT_BINARY + assert artifact_experiment_data == ARTIFACT_BINARY assert __test_additional_tags_and_comments( expected_artifact_experiment_dir, project_json["name"], @@ -804,7 +804,7 @@ def __test_additional_tags_and_comments( repository.create_dataframe( domain.Dataframe(**dataframe_project_json), - TEST_DATAFRAME, + DATAFRAME, project_json["name"], ) @@ -825,7 +825,7 @@ def __test_additional_tags_and_comments( dataframe_project_data = pd.read_parquet(expected_dataframe_project_data_path) assert dataframe_project == dataframe_project_json - assert dataframe_project_data.equals(TEST_DATAFRAME) + assert dataframe_project_data.equals(DATAFRAME) assert __test_additional_tags_and_comments( expected_dataframe_project_dir, project_json["name"], @@ -835,7 +835,7 @@ def __test_additional_tags_and_comments( repository.create_dataframe( domain.Dataframe(**dataframe_experiment_json), - TEST_DATAFRAME, + DATAFRAME, project_json["name"], experiment_json["id"], ) @@ -857,7 +857,7 @@ def __test_additional_tags_and_comments( dataframe_experiment_data = pd.read_parquet(expected_dataframe_experiment_data_path) assert dataframe_experiment == dataframe_experiment_json - assert dataframe_experiment_data.equals(TEST_DATAFRAME) + assert dataframe_experiment_data.equals(DATAFRAME) assert __test_additional_tags_and_comments( expected_dataframe_experiment_dir, project_json["name"], @@ -865,3 +865,117 @@ def __test_additional_tags_and_comments( entity_identifier=dataframe_experiment_json["id"], entity_type="Dataframe", ) + + +def test_delete_regression( + artifact_project_json, + artifact_experiment_json, + dataframe_project_json, + dataframe_experiment_json, + experiment_json, + project_json, +): + """Tests that `rubicon_ml` can delete artifacts and dataframes from the filesystem.""" + filesystem = fsspec.filesystem("file") + + with tempfile.TemporaryDirectory() as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = LocalRepository(root_dir=root_dir) + + repository.create_artifact( + domain.Artifact(**artifact_project_json), + ARTIFACT_BINARY, + project_json["name"], + ) + + expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) + expected_artifact_project_path = os.path.join( + expected_project_dir, + "artifacts", + artifact_project_json["id"], + "metadata.json", + ) + + assert filesystem.exists(expected_artifact_project_path) + + repository.delete_artifact( + project_json["name"], + artifact_project_json["id"], + ) + + assert not filesystem.exists(expected_artifact_project_path) + + repository.create_artifact( + domain.Artifact(**artifact_experiment_json), + ARTIFACT_BINARY, + project_json["name"], + experiment_json["id"], + ) + + expected_experiment_dir = os.path.join( + expected_project_dir, + "experiments", + experiment_json["id"], + ) + expected_artifact_experiment_path = os.path.join( + expected_experiment_dir, + "artifacts", + artifact_experiment_json["id"], + "metadata.json", + ) + + assert filesystem.exists(expected_artifact_experiment_path) + + repository.delete_artifact( + project_json["name"], + artifact_experiment_json["id"], + experiment_json["id"], + ) + + assert not filesystem.exists(expected_artifact_experiment_path) + + repository.create_dataframe( + domain.Dataframe(**dataframe_project_json), + DATAFRAME, + project_json["name"], + ) + + expected_dataframe_project_path = os.path.join( + expected_project_dir, + "dataframes", + dataframe_project_json["id"], + "metadata.json", + ) + + assert filesystem.exists(expected_dataframe_project_path) + + repository.delete_dataframe( + project_json["name"], + dataframe_project_json["id"], + ) + + assert not filesystem.exists(expected_dataframe_project_path) + + repository.create_dataframe( + domain.Dataframe(**dataframe_experiment_json), + DATAFRAME, + project_json["name"], + experiment_json["id"], + ) + + expected_dataframe_experiment_path = os.path.join( + expected_experiment_dir, + "dataframes", + dataframe_experiment_json["id"], + "metadata.json", + ) + + assert filesystem.exists(expected_dataframe_experiment_path) + + repository.delete_dataframe( + project_json["name"], + dataframe_experiment_json["id"], + experiment_json["id"], + ) + + assert not filesystem.exists(expected_dataframe_experiment_path) From 30dc2a03488e568e940e0726688c3281c8f20581 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Thu, 29 Aug 2024 13:57:53 -0400 Subject: [PATCH 12/16] parametrize repositories --- .../regression/test_repository_read_write.py | 419 ++++++++++-------- 1 file changed, 230 insertions(+), 189 deletions(-) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index c14d3fe2..3de5add0 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -1,22 +1,28 @@ +import contextlib import os import tempfile import uuid -import fsspec import pandas as pd +import pytest from rubicon_ml import domain -from rubicon_ml.repository import LocalRepository +from rubicon_ml.repository import LocalRepository, MemoryRepository from rubicon_ml.repository.utils import json, slugify ARTIFACT_BINARY = b"artifact" COMMENTS_TO_ADD = ["comment_a", "comment_b"] COMMENTS_TO_REMOVE = ["comment_a"] DATAFRAME = pd.DataFrame([[0]]) +REPOSITORIES_TO_TEST = [ # TODO: find local/CI S3 testing solution + LocalRepository, + MemoryRepository, +] TAGS_TO_ADD = ["added_a", "added_b"] TAGS_TO_REMOVE = ["added_a"] +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) def test_read_regression( artifact_project_json, artifact_experiment_json, @@ -27,31 +33,40 @@ def test_read_regression( metric_json, parameter_json, project_json, + repository_class, ): - """Tests that `rubicon_ml` can read each domain entity from the filesystem.""" - filesystem = fsspec.filesystem("file") - - with tempfile.TemporaryDirectory() as temp_dir_name: + """Tests that `rubicon_ml` can read each domain entity from the filesystem. + + The `MemoryRepository` skips dataframe tests as the `pandas` API can not be + used to write directly to memory. Dataframe regression tests are covered by + `test_read_write_regression`. + """ + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_read_regression/") + + with temp_dir_context as temp_dir_name: root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") - repository = LocalRepository(root_dir=root_dir) + repository = repository_class(root_dir=root_dir) def __test_additional_tags_and_comments( tag_comment_dir, project_name, **entity_identification_kwargs ): add_tag_path = os.path.join(tag_comment_dir, f"tags_{uuid.uuid4()}.json") - with filesystem.open(add_tag_path, "w") as file: + with repository.filesystem.open(add_tag_path, "w") as file: file.write(json.dumps({"added_tags": TAGS_TO_ADD})) remove_tag_path = os.path.join(tag_comment_dir, f"tags_{uuid.uuid4()}.json") - with filesystem.open(remove_tag_path, "w") as file: + with repository.filesystem.open(remove_tag_path, "w") as file: file.write(json.dumps({"removed_tags": TAGS_TO_REMOVE})) add_comment_path = os.path.join(tag_comment_dir, f"comments_{uuid.uuid4()}.json") - with filesystem.open(add_comment_path, "w") as file: + with repository.filesystem.open(add_comment_path, "w") as file: file.write(json.dumps({"added_comments": COMMENTS_TO_ADD})) remove_comment_path = os.path.join(tag_comment_dir, f"comments_{uuid.uuid4()}.json") - with filesystem.open(remove_comment_path, "w") as file: + with repository.filesystem.open(remove_comment_path, "w") as file: file.write(json.dumps({"removed_comments": COMMENTS_TO_REMOVE})) additional_tags = repository.get_tags( @@ -73,8 +88,8 @@ def __test_additional_tags_and_comments( expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) expected_project_path = os.path.join(expected_project_dir, "metadata.json") - filesystem.mkdirs(expected_project_dir, exist_ok=True) - with filesystem.open(expected_project_path, "w") as file: + repository.filesystem.mkdirs(expected_project_dir, exist_ok=True) + with repository.filesystem.open(expected_project_path, "w") as file: file.write(json.dumps(project_json)) project = repository.get_project(project_json["name"]).__dict__ @@ -88,8 +103,8 @@ def __test_additional_tags_and_comments( ) expected_experiment_path = os.path.join(expected_experiment_dir, "metadata.json") - filesystem.mkdirs(expected_experiment_dir, exist_ok=True) - with filesystem.open(expected_experiment_path, "w") as file: + repository.filesystem.mkdirs(expected_experiment_dir, exist_ok=True) + with repository.filesystem.open(expected_experiment_path, "w") as file: file.write(json.dumps(experiment_json)) experiment = repository.get_experiment( @@ -113,8 +128,8 @@ def __test_additional_tags_and_comments( ) expected_feature_path = os.path.join(expected_feature_dir, "metadata.json") - filesystem.mkdirs(expected_feature_dir, exist_ok=True) - with filesystem.open(expected_feature_path, "w") as file: + repository.filesystem.mkdirs(expected_feature_dir, exist_ok=True) + with repository.filesystem.open(expected_feature_path, "w") as file: file.write(json.dumps(feature_json)) feature = repository.get_feature( @@ -139,8 +154,8 @@ def __test_additional_tags_and_comments( ) expected_metric_path = os.path.join(expected_metric_dir, "metadata.json") - filesystem.mkdirs(expected_metric_dir, exist_ok=True) - with filesystem.open(expected_metric_path, "w") as file: + repository.filesystem.mkdirs(expected_metric_dir, exist_ok=True) + with repository.filesystem.open(expected_metric_path, "w") as file: file.write(json.dumps(metric_json)) metric = repository.get_metric( @@ -165,8 +180,8 @@ def __test_additional_tags_and_comments( ) expected_parameter_path = os.path.join(expected_parameter_dir, "metadata.json") - filesystem.mkdirs(expected_parameter_dir, exist_ok=True) - with filesystem.open(expected_parameter_path, "w") as file: + repository.filesystem.mkdirs(expected_parameter_dir, exist_ok=True) + with repository.filesystem.open(expected_parameter_path, "w") as file: file.write(json.dumps(parameter_json)) parameter = repository.get_parameter( @@ -194,10 +209,10 @@ def __test_additional_tags_and_comments( ) expected_artifact_project_data_path = os.path.join(expected_artifact_project_dir, "data") - filesystem.mkdirs(expected_artifact_project_dir, exist_ok=True) - with filesystem.open(expected_artifact_project_path, "w") as file: + repository.filesystem.mkdirs(expected_artifact_project_dir, exist_ok=True) + with repository.filesystem.open(expected_artifact_project_path, "w") as file: file.write(json.dumps(artifact_project_json)) - with filesystem.open(expected_artifact_project_data_path, "wb") as file: + with repository.filesystem.open(expected_artifact_project_data_path, "wb") as file: file.write(ARTIFACT_BINARY) artifact_project = repository.get_artifact_metadata( @@ -230,10 +245,10 @@ def __test_additional_tags_and_comments( expected_artifact_experiment_dir, "data" ) - filesystem.mkdirs(expected_artifact_experiment_dir, exist_ok=True) - with filesystem.open(expected_artifact_experiment_path, "w") as file: + repository.filesystem.mkdirs(expected_artifact_experiment_dir, exist_ok=True) + with repository.filesystem.open(expected_artifact_experiment_path, "w") as file: file.write(json.dumps(artifact_experiment_json)) - with filesystem.open(expected_artifact_experiment_data_path, "wb") as file: + with repository.filesystem.open(expected_artifact_experiment_data_path, "wb") as file: file.write(ARTIFACT_BINARY) artifact_experiment = repository.get_artifact_metadata( @@ -257,86 +272,90 @@ def __test_additional_tags_and_comments( entity_type="Artifact", ) - expected_dataframe_project_dir = os.path.join( - expected_project_dir, - "dataframes", - dataframe_project_json["id"], - ) - expected_dataframe_project_path = os.path.join( - expected_dataframe_project_dir, "metadata.json" - ) - expected_dataframe_project_data_dir = os.path.join(expected_dataframe_project_dir, "data") - expected_dataframe_project_data_path = os.path.join( - expected_dataframe_project_data_dir, "data.parquet" - ) - - filesystem.mkdirs(expected_dataframe_project_dir, exist_ok=True) - filesystem.mkdirs(expected_dataframe_project_data_dir, exist_ok=True) - with filesystem.open(expected_dataframe_project_path, "w") as file: - file.write(json.dumps(dataframe_project_json)) - DATAFRAME.to_parquet(expected_dataframe_project_data_path) - - dataframe_project = repository.get_dataframe_metadata( - project_json["name"], - dataframe_project_json["id"], - ).__dict__ - dataframe_project_data = repository.get_dataframe_data( - project_json["name"], - dataframe_project_json["id"], - ) + if repository_class != MemoryRepository: + expected_dataframe_project_dir = os.path.join( + expected_project_dir, + "dataframes", + dataframe_project_json["id"], + ) + expected_dataframe_project_path = os.path.join( + expected_dataframe_project_dir, "metadata.json" + ) + expected_dataframe_project_data_dir = os.path.join( + expected_dataframe_project_dir, "data" + ) + expected_dataframe_project_data_path = os.path.join( + expected_dataframe_project_data_dir, "data.parquet" + ) - assert dataframe_project == dataframe_project_json - assert dataframe_project_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( - expected_dataframe_project_dir, - project_json["name"], - entity_identifier=dataframe_project_json["id"], - entity_type="Dataframe", - ) + repository.filesystem.mkdirs(expected_dataframe_project_dir, exist_ok=True) + repository.filesystem.mkdirs(expected_dataframe_project_data_dir, exist_ok=True) + with repository.filesystem.open(expected_dataframe_project_path, "w") as file: + file.write(json.dumps(dataframe_project_json)) + DATAFRAME.to_parquet(expected_dataframe_project_data_path) + + dataframe_project = repository.get_dataframe_metadata( + project_json["name"], + dataframe_project_json["id"], + ).__dict__ + dataframe_project_data = repository.get_dataframe_data( + project_json["name"], + dataframe_project_json["id"], + ) - expected_dataframe_experiment_dir = os.path.join( - expected_experiment_dir, - "dataframes", - dataframe_experiment_json["id"], - ) - expected_dataframe_experiment_path = os.path.join( - expected_dataframe_experiment_dir, "metadata.json" - ) - expected_dataframe_experiment_data_dir = os.path.join( - expected_dataframe_experiment_dir, "data" - ) - expected_dataframe_experiment_data_path = os.path.join( - expected_dataframe_experiment_data_dir, "data.parquet" - ) + assert dataframe_project == dataframe_project_json + assert dataframe_project_data.equals(DATAFRAME) + assert __test_additional_tags_and_comments( + expected_dataframe_project_dir, + project_json["name"], + entity_identifier=dataframe_project_json["id"], + entity_type="Dataframe", + ) - filesystem.mkdirs(expected_dataframe_experiment_dir, exist_ok=True) - filesystem.mkdirs(expected_dataframe_experiment_data_dir, exist_ok=True) - with filesystem.open(expected_dataframe_experiment_path, "w") as file: - file.write(json.dumps(dataframe_experiment_json)) - DATAFRAME.to_parquet(expected_dataframe_experiment_data_path) + expected_dataframe_experiment_dir = os.path.join( + expected_experiment_dir, + "dataframes", + dataframe_experiment_json["id"], + ) + expected_dataframe_experiment_path = os.path.join( + expected_dataframe_experiment_dir, "metadata.json" + ) + expected_dataframe_experiment_data_dir = os.path.join( + expected_dataframe_experiment_dir, "data" + ) + expected_dataframe_experiment_data_path = os.path.join( + expected_dataframe_experiment_data_dir, "data.parquet" + ) - dataframe_experiment = repository.get_dataframe_metadata( - project_json["name"], - dataframe_experiment_json["id"], - experiment_json["id"], - ).__dict__ - dataframe_experiment_data = repository.get_dataframe_data( - project_json["name"], - dataframe_experiment_json["id"], - experiment_json["id"], - ) + repository.filesystem.mkdirs(expected_dataframe_experiment_dir, exist_ok=True) + repository.filesystem.mkdirs(expected_dataframe_experiment_data_dir, exist_ok=True) + with repository.filesystem.open(expected_dataframe_experiment_path, "w") as file: + file.write(json.dumps(dataframe_experiment_json)) + DATAFRAME.to_parquet(expected_dataframe_experiment_data_path) + + dataframe_experiment = repository.get_dataframe_metadata( + project_json["name"], + dataframe_experiment_json["id"], + experiment_json["id"], + ).__dict__ + dataframe_experiment_data = repository.get_dataframe_data( + project_json["name"], + dataframe_experiment_json["id"], + experiment_json["id"], + ) - assert dataframe_experiment == dataframe_experiment_json - assert dataframe_experiment_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( - expected_dataframe_experiment_dir, - project_json["name"], - experiment_id=experiment_json["id"], - entity_identifier=dataframe_experiment_json["id"], - entity_type="Dataframe", - ) + assert dataframe_experiment == dataframe_experiment_json + assert dataframe_experiment_data.equals(DATAFRAME) + assert __test_additional_tags_and_comments( + expected_dataframe_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=dataframe_experiment_json["id"], + entity_type="Dataframe", + ) +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) def test_read_write_regression( artifact_project_json, artifact_experiment_json, @@ -347,11 +366,17 @@ def test_read_write_regression( metric_json, parameter_json, project_json, + repository_class, ): """Tests that `rubicon_ml` can read each domain entity that it wrote.""" - with tempfile.TemporaryDirectory() as temp_dir_name: + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_read_write_regression/") + + with temp_dir_context as temp_dir_name: root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") - repository = LocalRepository(root_dir=root_dir) + repository = repository_class(root_dir=root_dir) def __test_additional_tags_and_comments(project_name, **entity_identification_kwargs): repository.add_tags( @@ -563,6 +588,7 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw ) +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) def test_write_regression( artifact_project_json, artifact_experiment_json, @@ -573,13 +599,22 @@ def test_write_regression( metric_json, parameter_json, project_json, + repository_class, ): - """Tests that `rubicon_ml` can write each domain entity to the filesystem.""" - filesystem = fsspec.filesystem("file") - - with tempfile.TemporaryDirectory() as temp_dir_name: + """Tests that `rubicon_ml` can write each domain entity to the filesystem. + + The `MemoryRepository` skips dataframe tests as the `pandas` API can not be + used to read directly from memory. Dataframe regression tests are covered by + `test_read_write_regression`. + """ + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_write_regression/") + + with temp_dir_context as temp_dir_name: root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") - repository = LocalRepository(root_dir=root_dir) + repository = repository_class(root_dir=root_dir) def __test_additional_tags_and_comments( tag_dir, project_name, **entity_identification_kwargs @@ -598,9 +633,9 @@ def __test_additional_tags_and_comments( ) tag_path = os.path.join(tag_dir, "tags_*.json") - tag_files = filesystem.glob(tag_path, detail=True) + tag_files = repository.filesystem.glob(tag_path, detail=True) for tag_file in tag_files: - with filesystem.open(tag_file, "r") as file: + with repository.filesystem.open(tag_file, "r") as file: tags = json.loads(file.read()) if "added_tags" in tags: @@ -620,9 +655,9 @@ def __test_additional_tags_and_comments( ) comment_path = os.path.join(tag_dir, "comments_*.json") - comment_files = filesystem.glob(comment_path, detail=True) + comment_files = repository.filesystem.glob(comment_path, detail=True) for comment_file in comment_files: - with filesystem.open(comment_file, "r") as file: + with repository.filesystem.open(comment_file, "r") as file: comments = json.loads(file.read()) if "added_comments" in comments: @@ -637,7 +672,7 @@ def __test_additional_tags_and_comments( expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) expected_project_path = os.path.join(expected_project_dir, "metadata.json") - with filesystem.open(expected_project_path, "r") as file: + with repository.filesystem.open(expected_project_path, "r") as file: project = json.loads(file.read()) assert project == project_json @@ -651,7 +686,7 @@ def __test_additional_tags_and_comments( ) expected_experiment_path = os.path.join(expected_experiment_dir, "metadata.json") - with filesystem.open(expected_experiment_path, "r") as file: + with repository.filesystem.open(expected_experiment_path, "r") as file: experiment = json.loads(file.read()) assert experiment == experiment_json @@ -676,7 +711,7 @@ def __test_additional_tags_and_comments( ) expected_feature_path = os.path.join(expected_feature_dir, "metadata.json") - with filesystem.open(expected_feature_path, "r") as file: + with repository.filesystem.open(expected_feature_path, "r") as file: feature = json.loads(file.read()) assert feature == feature_json @@ -701,7 +736,7 @@ def __test_additional_tags_and_comments( ) expected_metric_path = os.path.join(expected_metric_dir, "metadata.json") - with filesystem.open(expected_metric_path, "r") as file: + with repository.filesystem.open(expected_metric_path, "r") as file: metric = json.loads(file.read()) assert metric == metric_json @@ -726,7 +761,7 @@ def __test_additional_tags_and_comments( ) expected_parameter_path = os.path.join(expected_parameter_dir, "metadata.json") - with filesystem.open(expected_parameter_path, "r") as file: + with repository.filesystem.open(expected_parameter_path, "r") as file: parameter = json.loads(file.read()) assert parameter == parameter_json @@ -754,9 +789,9 @@ def __test_additional_tags_and_comments( ) expected_artifact_project_data_path = os.path.join(expected_artifact_project_dir, "data") - with filesystem.open(expected_artifact_project_path, "r") as file: + with repository.filesystem.open(expected_artifact_project_path, "r") as file: artifact_project = json.loads(file.read()) - with filesystem.open(expected_artifact_project_data_path, "rb") as file: + with repository.filesystem.open(expected_artifact_project_data_path, "rb") as file: artifact_project_data = file.read() assert artifact_project == artifact_project_json @@ -787,9 +822,9 @@ def __test_additional_tags_and_comments( expected_artifact_experiment_dir, "data" ) - with filesystem.open(expected_artifact_experiment_path, "r") as file: + with repository.filesystem.open(expected_artifact_experiment_path, "r") as file: artifact_experiment = json.loads(file.read()) - with filesystem.open(expected_artifact_experiment_data_path, "rb") as file: + with repository.filesystem.open(expected_artifact_experiment_data_path, "rb") as file: artifact_experiment_data = file.read() assert artifact_experiment == artifact_experiment_json @@ -802,71 +837,73 @@ def __test_additional_tags_and_comments( entity_type="Artifact", ) - repository.create_dataframe( - domain.Dataframe(**dataframe_project_json), - DATAFRAME, - project_json["name"], - ) - - expected_dataframe_project_dir = os.path.join( - expected_project_dir, - "dataframes", - dataframe_project_json["id"], - ) - expected_dataframe_project_path = os.path.join( - expected_dataframe_project_dir, "metadata.json" - ) - expected_dataframe_project_data_path = os.path.join( - expected_dataframe_project_dir, "data", "data.parquet" - ) - - with filesystem.open(expected_dataframe_project_path, "r") as file: - dataframe_project = json.loads(file.read()) - dataframe_project_data = pd.read_parquet(expected_dataframe_project_data_path) + if repository_class != MemoryRepository: + repository.create_dataframe( + domain.Dataframe(**dataframe_project_json), + DATAFRAME, + project_json["name"], + ) - assert dataframe_project == dataframe_project_json - assert dataframe_project_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( - expected_dataframe_project_dir, - project_json["name"], - entity_identifier=dataframe_project_json["id"], - entity_type="Dataframe", - ) + expected_dataframe_project_dir = os.path.join( + expected_project_dir, + "dataframes", + dataframe_project_json["id"], + ) + expected_dataframe_project_path = os.path.join( + expected_dataframe_project_dir, "metadata.json" + ) + expected_dataframe_project_data_path = os.path.join( + expected_dataframe_project_dir, "data", "data.parquet" + ) - repository.create_dataframe( - domain.Dataframe(**dataframe_experiment_json), - DATAFRAME, - project_json["name"], - experiment_json["id"], - ) + with repository.filesystem.open(expected_dataframe_project_path, "r") as file: + dataframe_project = json.loads(file.read()) + dataframe_project_data = pd.read_parquet(expected_dataframe_project_data_path) + + assert dataframe_project == dataframe_project_json + assert dataframe_project_data.equals(DATAFRAME) + assert __test_additional_tags_and_comments( + expected_dataframe_project_dir, + project_json["name"], + entity_identifier=dataframe_project_json["id"], + entity_type="Dataframe", + ) - expected_dataframe_experiment_dir = os.path.join( - expected_experiment_dir, - "dataframes", - dataframe_experiment_json["id"], - ) - expected_dataframe_experiment_path = os.path.join( - expected_dataframe_experiment_dir, "metadata.json" - ) - expected_dataframe_experiment_data_path = os.path.join( - expected_dataframe_experiment_dir, "data", "data.parquet" - ) + repository.create_dataframe( + domain.Dataframe(**dataframe_experiment_json), + DATAFRAME, + project_json["name"], + experiment_json["id"], + ) - with filesystem.open(expected_dataframe_experiment_path, "r") as file: - dataframe_experiment = json.loads(file.read()) - dataframe_experiment_data = pd.read_parquet(expected_dataframe_experiment_data_path) + expected_dataframe_experiment_dir = os.path.join( + expected_experiment_dir, + "dataframes", + dataframe_experiment_json["id"], + ) + expected_dataframe_experiment_path = os.path.join( + expected_dataframe_experiment_dir, "metadata.json" + ) + expected_dataframe_experiment_data_path = os.path.join( + expected_dataframe_experiment_dir, "data", "data.parquet" + ) - assert dataframe_experiment == dataframe_experiment_json - assert dataframe_experiment_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( - expected_dataframe_experiment_dir, - project_json["name"], - experiment_id=experiment_json["id"], - entity_identifier=dataframe_experiment_json["id"], - entity_type="Dataframe", - ) + with repository.filesystem.open(expected_dataframe_experiment_path, "r") as file: + dataframe_experiment = json.loads(file.read()) + dataframe_experiment_data = pd.read_parquet(expected_dataframe_experiment_data_path) + + assert dataframe_experiment == dataframe_experiment_json + assert dataframe_experiment_data.equals(DATAFRAME) + assert __test_additional_tags_and_comments( + expected_dataframe_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=dataframe_experiment_json["id"], + entity_type="Dataframe", + ) +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) def test_delete_regression( artifact_project_json, artifact_experiment_json, @@ -874,13 +911,17 @@ def test_delete_regression( dataframe_experiment_json, experiment_json, project_json, + repository_class, ): """Tests that `rubicon_ml` can delete artifacts and dataframes from the filesystem.""" - filesystem = fsspec.filesystem("file") + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_delete_regression/") - with tempfile.TemporaryDirectory() as temp_dir_name: + with temp_dir_context as temp_dir_name: root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") - repository = LocalRepository(root_dir=root_dir) + repository = repository_class(root_dir=root_dir) repository.create_artifact( domain.Artifact(**artifact_project_json), @@ -896,14 +937,14 @@ def test_delete_regression( "metadata.json", ) - assert filesystem.exists(expected_artifact_project_path) + assert repository.filesystem.exists(expected_artifact_project_path) repository.delete_artifact( project_json["name"], artifact_project_json["id"], ) - assert not filesystem.exists(expected_artifact_project_path) + assert not repository.filesystem.exists(expected_artifact_project_path) repository.create_artifact( domain.Artifact(**artifact_experiment_json), @@ -924,7 +965,7 @@ def test_delete_regression( "metadata.json", ) - assert filesystem.exists(expected_artifact_experiment_path) + assert repository.filesystem.exists(expected_artifact_experiment_path) repository.delete_artifact( project_json["name"], @@ -932,7 +973,7 @@ def test_delete_regression( experiment_json["id"], ) - assert not filesystem.exists(expected_artifact_experiment_path) + assert not repository.filesystem.exists(expected_artifact_experiment_path) repository.create_dataframe( domain.Dataframe(**dataframe_project_json), @@ -947,14 +988,14 @@ def test_delete_regression( "metadata.json", ) - assert filesystem.exists(expected_dataframe_project_path) + assert repository.filesystem.exists(expected_dataframe_project_path) repository.delete_dataframe( project_json["name"], dataframe_project_json["id"], ) - assert not filesystem.exists(expected_dataframe_project_path) + assert not repository.filesystem.exists(expected_dataframe_project_path) repository.create_dataframe( domain.Dataframe(**dataframe_experiment_json), @@ -970,7 +1011,7 @@ def test_delete_regression( "metadata.json", ) - assert filesystem.exists(expected_dataframe_experiment_path) + assert repository.filesystem.exists(expected_dataframe_experiment_path) repository.delete_dataframe( project_json["name"], @@ -978,4 +1019,4 @@ def test_delete_regression( experiment_json["id"], ) - assert not filesystem.exists(expected_dataframe_experiment_path) + assert not repository.filesystem.exists(expected_dataframe_experiment_path) From 1cdbb479f911df6cf448a4d0ebfd5ddc32fb285e Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Thu, 29 Aug 2024 14:17:23 -0400 Subject: [PATCH 13/16] don't assume order of results --- .../regression/test_repository_read_write.py | 50 +++++++++++++------ 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 3de5add0..38b788bb 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -53,6 +53,8 @@ def test_read_regression( def __test_additional_tags_and_comments( tag_comment_dir, project_name, **entity_identification_kwargs ): + is_passing = True + add_tag_path = os.path.join(tag_comment_dir, f"tags_{uuid.uuid4()}.json") with repository.filesystem.open(add_tag_path, "w") as file: file.write(json.dumps({"added_tags": TAGS_TO_ADD})) @@ -61,6 +63,16 @@ def __test_additional_tags_and_comments( with repository.filesystem.open(remove_tag_path, "w") as file: file.write(json.dumps({"removed_tags": TAGS_TO_REMOVE})) + additional_tags = repository.get_tags( + project_name, + **entity_identification_kwargs, + ) + for tags in additional_tags: + if "added_tags" in tags: + is_passing &= tags["added_tags"] == TAGS_TO_ADD + if "removed_tags" in tags: + is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE + add_comment_path = os.path.join(tag_comment_dir, f"comments_{uuid.uuid4()}.json") with repository.filesystem.open(add_comment_path, "w") as file: file.write(json.dumps({"added_comments": COMMENTS_TO_ADD})) @@ -69,21 +81,17 @@ def __test_additional_tags_and_comments( with repository.filesystem.open(remove_comment_path, "w") as file: file.write(json.dumps({"removed_comments": COMMENTS_TO_REMOVE})) - additional_tags = repository.get_tags( - project_name, - **entity_identification_kwargs, - ) additional_comments = repository.get_comments( project_name, **entity_identification_kwargs, ) + for comments in additional_comments: + if "added_comments" in comments: + is_passing &= comments["added_comments"] == COMMENTS_TO_ADD + if "removed_tags" in comments: + is_passing &= comments["removed_comments"] == COMMENTS_TO_REMOVE - return ( - additional_tags[0]["added_tags"] == TAGS_TO_ADD - and additional_tags[1]["removed_tags"] == TAGS_TO_REMOVE - and additional_comments[0]["added_comments"] == COMMENTS_TO_ADD - and additional_comments[1]["removed_comments"] == COMMENTS_TO_REMOVE - ) + return is_passing expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) expected_project_path = os.path.join(expected_project_dir, "metadata.json") @@ -379,6 +387,8 @@ def test_read_write_regression( repository = repository_class(root_dir=root_dir) def __test_additional_tags_and_comments(project_name, **entity_identification_kwargs): + is_passing = True + repository.add_tags( project_name, TAGS_TO_ADD, @@ -393,6 +403,13 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw project_name, **entity_identification_kwargs, ) + + for tags in additional_tags: + if "added_tags" in tags: + is_passing &= tags["added_tags"] == TAGS_TO_ADD + if "removed_tags" in tags: + is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE + repository.add_comments( project_name, COMMENTS_TO_ADD, @@ -408,12 +425,13 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw **entity_identification_kwargs, ) - return ( - additional_tags[0]["added_tags"] == TAGS_TO_ADD - and additional_tags[1]["removed_tags"] == TAGS_TO_REMOVE - and additional_comments[0]["added_comments"] == COMMENTS_TO_ADD - and additional_comments[1]["removed_comments"] == COMMENTS_TO_REMOVE - ) + for comments in additional_comments: + if "added_comments" in comments: + is_passing &= comments["added_comments"] == COMMENTS_TO_ADD + if "removed_tags" in comments: + is_passing &= comments["removed_comments"] == COMMENTS_TO_REMOVE + + return is_passing repository.create_project(domain.Project(**project_json)) project = repository.get_project(project_json["name"]).__dict__ From a0b9ee4a6392eed0481a4e583534646bf8a9cce8 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Fri, 6 Sep 2024 11:04:59 -0400 Subject: [PATCH 14/16] split read regression into individual entity tests --- .../regression/test_repository_read_write.py | 415 +++++++++++++----- 1 file changed, 295 insertions(+), 120 deletions(-) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 38b788bb..5859bf34 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -22,79 +22,66 @@ TAGS_TO_REMOVE = ["added_a"] -@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) -def test_read_regression( - artifact_project_json, - artifact_experiment_json, - dataframe_project_json, - dataframe_experiment_json, - experiment_json, - feature_json, - metric_json, - parameter_json, - project_json, - repository_class, +def _test_read_additional_tags_and_comments( + repository, tag_comment_dir, project_name, **entity_identification_kwargs ): - """Tests that `rubicon_ml` can read each domain entity from the filesystem. + is_passing = True + + add_tag_path = os.path.join(tag_comment_dir, f"tags_{uuid.uuid4()}.json") + with repository.filesystem.open(add_tag_path, "w") as file: + file.write(json.dumps({"added_tags": TAGS_TO_ADD})) + + remove_tag_path = os.path.join(tag_comment_dir, f"tags_{uuid.uuid4()}.json") + with repository.filesystem.open(remove_tag_path, "w") as file: + file.write(json.dumps({"removed_tags": TAGS_TO_REMOVE})) + + additional_tags = repository.get_tags( + project_name, + **entity_identification_kwargs, + ) + for tags in additional_tags: + if "added_tags" in tags: + is_passing &= tags["added_tags"] == TAGS_TO_ADD + if "removed_tags" in tags: + is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE + + add_comment_path = os.path.join(tag_comment_dir, f"comments_{uuid.uuid4()}.json") + with repository.filesystem.open(add_comment_path, "w") as file: + file.write(json.dumps({"added_comments": COMMENTS_TO_ADD})) + + remove_comment_path = os.path.join(tag_comment_dir, f"comments_{uuid.uuid4()}.json") + with repository.filesystem.open(remove_comment_path, "w") as file: + file.write(json.dumps({"removed_comments": COMMENTS_TO_REMOVE})) + + additional_comments = repository.get_comments( + project_name, + **entity_identification_kwargs, + ) + for comments in additional_comments: + if "added_comments" in comments: + is_passing &= comments["added_comments"] == COMMENTS_TO_ADD + if "removed_tags" in comments: + is_passing &= comments["removed_comments"] == COMMENTS_TO_REMOVE + + return is_passing - The `MemoryRepository` skips dataframe tests as the `pandas` API can not be - used to write directly to memory. Dataframe regression tests are covered by - `test_read_write_regression`. - """ + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_project_regression(project_json, repository_class): + """Tests that `rubicon_ml` can read the project domain entity from the filesystem.""" if repository_class == LocalRepository: temp_dir_context = tempfile.TemporaryDirectory() else: - temp_dir_context = contextlib.nullcontext(enter_result="./test_read_regression/") + temp_dir_context = contextlib.nullcontext(enter_result="./test_read_project_regression/") with temp_dir_context as temp_dir_name: root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = repository_class(root_dir=root_dir) - def __test_additional_tags_and_comments( - tag_comment_dir, project_name, **entity_identification_kwargs - ): - is_passing = True - - add_tag_path = os.path.join(tag_comment_dir, f"tags_{uuid.uuid4()}.json") - with repository.filesystem.open(add_tag_path, "w") as file: - file.write(json.dumps({"added_tags": TAGS_TO_ADD})) - - remove_tag_path = os.path.join(tag_comment_dir, f"tags_{uuid.uuid4()}.json") - with repository.filesystem.open(remove_tag_path, "w") as file: - file.write(json.dumps({"removed_tags": TAGS_TO_REMOVE})) - - additional_tags = repository.get_tags( - project_name, - **entity_identification_kwargs, - ) - for tags in additional_tags: - if "added_tags" in tags: - is_passing &= tags["added_tags"] == TAGS_TO_ADD - if "removed_tags" in tags: - is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE - - add_comment_path = os.path.join(tag_comment_dir, f"comments_{uuid.uuid4()}.json") - with repository.filesystem.open(add_comment_path, "w") as file: - file.write(json.dumps({"added_comments": COMMENTS_TO_ADD})) - - remove_comment_path = os.path.join(tag_comment_dir, f"comments_{uuid.uuid4()}.json") - with repository.filesystem.open(remove_comment_path, "w") as file: - file.write(json.dumps({"removed_comments": COMMENTS_TO_REMOVE})) - - additional_comments = repository.get_comments( - project_name, - **entity_identification_kwargs, - ) - for comments in additional_comments: - if "added_comments" in comments: - is_passing &= comments["added_comments"] == COMMENTS_TO_ADD - if "removed_tags" in comments: - is_passing &= comments["removed_comments"] == COMMENTS_TO_REMOVE - - return is_passing - expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) - expected_project_path = os.path.join(expected_project_dir, "metadata.json") + expected_project_path = os.path.join( + root_dir, slugify(project_json["name"]), "metadata.json" + ) repository.filesystem.mkdirs(expected_project_dir, exist_ok=True) with repository.filesystem.open(expected_project_path, "w") as file: @@ -104,8 +91,22 @@ def __test_additional_tags_and_comments( assert project == project_json + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_experiment_regression(experiment_json, project_json, repository_class): + """Tests that `rubicon_ml` can read the experiment domain entity from the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_read_experiment_regression/") + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + expected_experiment_dir = os.path.join( - expected_project_dir, + root_dir, + slugify(project_json["name"]), "experiments", experiment_json["id"], ) @@ -121,7 +122,8 @@ def __test_additional_tags_and_comments( ).__dict__ assert experiment == experiment_json - assert __test_additional_tags_and_comments( + assert _test_read_additional_tags_and_comments( + repository, expected_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -129,8 +131,29 @@ def __test_additional_tags_and_comments( entity_type="Experiment", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_feature_regression( + experiment_json, + feature_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the feature domain entity from the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_read_feature_regression/") + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + expected_feature_dir = os.path.join( - expected_experiment_dir, + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], "features", slugify(feature_json["name"]), ) @@ -147,7 +170,8 @@ def __test_additional_tags_and_comments( ).__dict__ assert feature == feature_json - assert __test_additional_tags_and_comments( + assert _test_read_additional_tags_and_comments( + repository, expected_feature_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -155,8 +179,29 @@ def __test_additional_tags_and_comments( entity_type="Feature", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_metric_regression( + experiment_json, + metric_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the metric domain entity from the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_read_metric_regression/") + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + expected_metric_dir = os.path.join( - expected_experiment_dir, + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], "metrics", slugify(metric_json["name"]), ) @@ -173,7 +218,8 @@ def __test_additional_tags_and_comments( ).__dict__ assert metric == metric_json - assert __test_additional_tags_and_comments( + assert _test_read_additional_tags_and_comments( + repository, expected_metric_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -181,8 +227,29 @@ def __test_additional_tags_and_comments( entity_type="Metric", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_parameter_regression( + experiment_json, + parameter_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the parameter domain entity from the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_read_parameter_regression/") + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + expected_parameter_dir = os.path.join( - expected_experiment_dir, + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], "parameters", slugify(parameter_json["name"]), ) @@ -199,7 +266,8 @@ def __test_additional_tags_and_comments( ).__dict__ assert parameter == parameter_json - assert __test_additional_tags_and_comments( + assert _test_read_additional_tags_and_comments( + repository, expected_parameter_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -207,8 +275,28 @@ def __test_additional_tags_and_comments( entity_type="Parameter", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_artifact_project_regression( + artifact_project_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the artifact (project) domain entity from the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_artifact_project_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + expected_artifact_project_dir = os.path.join( - expected_project_dir, + root_dir, + slugify(project_json["name"]), "artifacts", artifact_project_json["id"], ) @@ -234,15 +322,39 @@ def __test_additional_tags_and_comments( assert artifact_project == artifact_project_json assert artifact_project_data == ARTIFACT_BINARY - assert __test_additional_tags_and_comments( + assert _test_read_additional_tags_and_comments( + repository, expected_artifact_project_dir, project_json["name"], entity_identifier=artifact_project_json["id"], entity_type="Artifact", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_artifact_experiment_regression( + artifact_experiment_json, + experiment_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the artifact (experiment) domain entity from the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_artifact_experiment_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + expected_artifact_experiment_dir = os.path.join( - expected_experiment_dir, + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], "artifacts", artifact_experiment_json["id"], ) @@ -272,7 +384,8 @@ def __test_additional_tags_and_comments( assert artifact_experiment == artifact_experiment_json assert artifact_experiment_data == ARTIFACT_BINARY - assert __test_additional_tags_and_comments( + assert _test_read_additional_tags_and_comments( + repository, expected_artifact_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -280,15 +393,58 @@ def __test_additional_tags_and_comments( entity_type="Artifact", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_dataframe_project_regression( + dataframe_project_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the datafrane (project) domain entity from the filesystem. + + The `MemoryRepository` skips dataframe data as the `pandas` API can not be used to write directly + to memory. Dataframe data regression tests are covered by `test_read_write_regression`. + """ + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_dataframe_project_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + expected_dataframe_project_dir = os.path.join( + root_dir, + slugify(project_json["name"]), + "dataframes", + dataframe_project_json["id"], + ) + expected_dataframe_project_path = os.path.join( + expected_dataframe_project_dir, "metadata.json" + ) + + repository.filesystem.mkdirs(expected_dataframe_project_dir, exist_ok=True) + with repository.filesystem.open(expected_dataframe_project_path, "w") as file: + file.write(json.dumps(dataframe_project_json)) + + dataframe_project = repository.get_dataframe_metadata( + project_json["name"], + dataframe_project_json["id"], + ).__dict__ + + assert dataframe_project == dataframe_project_json + assert _test_read_additional_tags_and_comments( + repository, + expected_dataframe_project_dir, + project_json["name"], + entity_identifier=dataframe_project_json["id"], + entity_type="Dataframe", + ) + if repository_class != MemoryRepository: - expected_dataframe_project_dir = os.path.join( - expected_project_dir, - "dataframes", - dataframe_project_json["id"], - ) - expected_dataframe_project_path = os.path.join( - expected_dataframe_project_dir, "metadata.json" - ) expected_dataframe_project_data_dir = os.path.join( expected_dataframe_project_dir, "data" ) @@ -296,38 +452,73 @@ def __test_additional_tags_and_comments( expected_dataframe_project_data_dir, "data.parquet" ) - repository.filesystem.mkdirs(expected_dataframe_project_dir, exist_ok=True) repository.filesystem.mkdirs(expected_dataframe_project_data_dir, exist_ok=True) - with repository.filesystem.open(expected_dataframe_project_path, "w") as file: - file.write(json.dumps(dataframe_project_json)) DATAFRAME.to_parquet(expected_dataframe_project_data_path) - dataframe_project = repository.get_dataframe_metadata( - project_json["name"], - dataframe_project_json["id"], - ).__dict__ dataframe_project_data = repository.get_dataframe_data( project_json["name"], dataframe_project_json["id"], ) - assert dataframe_project == dataframe_project_json assert dataframe_project_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( - expected_dataframe_project_dir, - project_json["name"], - entity_identifier=dataframe_project_json["id"], - entity_type="Dataframe", - ) - expected_dataframe_experiment_dir = os.path.join( - expected_experiment_dir, - "dataframes", - dataframe_experiment_json["id"], - ) - expected_dataframe_experiment_path = os.path.join( - expected_dataframe_experiment_dir, "metadata.json" - ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_dataframe_experiment_regression( + dataframe_experiment_json, + experiment_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the datafrane (experiment) domain entity from the filesystem. + + The `MemoryRepository` skips dataframe data as the `pandas` API can not be used to write directly + to memory. Dataframe data regression tests are covered by `test_read_write_regression`. + """ + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_dataframe_experiment_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + expected_dataframe_experiment_dir = os.path.join( + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], + "dataframes", + dataframe_experiment_json["id"], + ) + expected_dataframe_experiment_path = os.path.join( + expected_dataframe_experiment_dir, "metadata.json" + ) + + repository.filesystem.mkdirs(expected_dataframe_experiment_dir, exist_ok=True) + with repository.filesystem.open(expected_dataframe_experiment_path, "w") as file: + file.write(json.dumps(dataframe_experiment_json)) + + dataframe_experiment = repository.get_dataframe_metadata( + project_json["name"], + dataframe_experiment_json["id"], + experiment_json["id"], + ).__dict__ + + assert dataframe_experiment == dataframe_experiment_json + assert _test_read_additional_tags_and_comments( + repository, + expected_dataframe_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=dataframe_experiment_json["id"], + entity_type="Dataframe", + ) + + if repository_class != MemoryRepository: expected_dataframe_experiment_data_dir = os.path.join( expected_dataframe_experiment_dir, "data" ) @@ -335,32 +526,16 @@ def __test_additional_tags_and_comments( expected_dataframe_experiment_data_dir, "data.parquet" ) - repository.filesystem.mkdirs(expected_dataframe_experiment_dir, exist_ok=True) repository.filesystem.mkdirs(expected_dataframe_experiment_data_dir, exist_ok=True) - with repository.filesystem.open(expected_dataframe_experiment_path, "w") as file: - file.write(json.dumps(dataframe_experiment_json)) DATAFRAME.to_parquet(expected_dataframe_experiment_data_path) - dataframe_experiment = repository.get_dataframe_metadata( - project_json["name"], - dataframe_experiment_json["id"], - experiment_json["id"], - ).__dict__ dataframe_experiment_data = repository.get_dataframe_data( project_json["name"], dataframe_experiment_json["id"], experiment_json["id"], ) - assert dataframe_experiment == dataframe_experiment_json assert dataframe_experiment_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( - expected_dataframe_experiment_dir, - project_json["name"], - experiment_id=experiment_json["id"], - entity_identifier=dataframe_experiment_json["id"], - entity_type="Dataframe", - ) @pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) From 5149403f081101e9fd092b621d54dbd2c9be63f1 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Fri, 6 Sep 2024 11:14:42 -0400 Subject: [PATCH 15/16] split read write regression into individual entity tests --- .../regression/test_repository_read_write.py | 304 ++++++++++++++---- 1 file changed, 236 insertions(+), 68 deletions(-) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index 5859bf34..a7568b8e 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -538,81 +538,91 @@ def test_read_dataframe_experiment_regression( assert dataframe_experiment_data.equals(DATAFRAME) -@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) -def test_read_write_regression( - artifact_project_json, - artifact_experiment_json, - dataframe_project_json, - dataframe_experiment_json, - experiment_json, - feature_json, - metric_json, - parameter_json, - project_json, - repository_class, +def _test_read_write_additional_tags_and_comments( + repository, project_name, **entity_identification_kwargs ): - """Tests that `rubicon_ml` can read each domain entity that it wrote.""" + is_passing = True + + repository.add_tags( + project_name, + TAGS_TO_ADD, + **entity_identification_kwargs, + ) + repository.remove_tags( + project_name, + TAGS_TO_REMOVE, + **entity_identification_kwargs, + ) + additional_tags = repository.get_tags( + project_name, + **entity_identification_kwargs, + ) + + for tags in additional_tags: + if "added_tags" in tags: + is_passing &= tags["added_tags"] == TAGS_TO_ADD + if "removed_tags" in tags: + is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE + + repository.add_comments( + project_name, + COMMENTS_TO_ADD, + **entity_identification_kwargs, + ) + repository.remove_comments( + project_name, + COMMENTS_TO_REMOVE, + **entity_identification_kwargs, + ) + additional_comments = repository.get_comments( + project_name, + **entity_identification_kwargs, + ) + + for comments in additional_comments: + if "added_comments" in comments: + is_passing &= comments["added_comments"] == COMMENTS_TO_ADD + if "removed_tags" in comments: + is_passing &= comments["removed_comments"] == COMMENTS_TO_REMOVE + + return is_passing + + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_write_project_regression(project_json, repository_class): + """Tests that `rubicon_ml` can read the project domain entity that it wrote.""" if repository_class == LocalRepository: temp_dir_context = tempfile.TemporaryDirectory() else: - temp_dir_context = contextlib.nullcontext(enter_result="./test_read_write_regression/") + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_write_project_regression/" + ) with temp_dir_context as temp_dir_name: root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = repository_class(root_dir=root_dir) - def __test_additional_tags_and_comments(project_name, **entity_identification_kwargs): - is_passing = True - - repository.add_tags( - project_name, - TAGS_TO_ADD, - **entity_identification_kwargs, - ) - repository.remove_tags( - project_name, - TAGS_TO_REMOVE, - **entity_identification_kwargs, - ) - additional_tags = repository.get_tags( - project_name, - **entity_identification_kwargs, - ) + repository.create_project(domain.Project(**project_json)) + project = repository.get_project(project_json["name"]).__dict__ - for tags in additional_tags: - if "added_tags" in tags: - is_passing &= tags["added_tags"] == TAGS_TO_ADD - if "removed_tags" in tags: - is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE + assert project == project_json - repository.add_comments( - project_name, - COMMENTS_TO_ADD, - **entity_identification_kwargs, - ) - repository.remove_comments( - project_name, - COMMENTS_TO_REMOVE, - **entity_identification_kwargs, - ) - additional_comments = repository.get_comments( - project_name, - **entity_identification_kwargs, - ) - for comments in additional_comments: - if "added_comments" in comments: - is_passing &= comments["added_comments"] == COMMENTS_TO_ADD - if "removed_tags" in comments: - is_passing &= comments["removed_comments"] == COMMENTS_TO_REMOVE +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_write_experiment_regression(experiment_json, project_json, repository_class): + """Tests that `rubicon_ml` can read the experiment domain entity that it wrote.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_write_experiment_regression/" + ) - return is_passing + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) repository.create_project(domain.Project(**project_json)) - project = repository.get_project(project_json["name"]).__dict__ - - assert project == project_json - repository.create_experiment(domain.Experiment(**experiment_json)) experiment = repository.get_experiment( project_json["name"], @@ -620,13 +630,36 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw ).__dict__ assert experiment == experiment_json - assert __test_additional_tags_and_comments( + assert _test_read_write_additional_tags_and_comments( + repository, project_json["name"], experiment_id=experiment_json["id"], entity_identifier=experiment_json["id"], entity_type="Experiment", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_write_feature_regression( + experiment_json, + feature_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the feature domain entity that it wrote.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_write_feature_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) repository.create_feature( domain.Feature(**feature_json), project_json["name"], @@ -639,13 +672,36 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw ).__dict__ assert feature == feature_json - assert __test_additional_tags_and_comments( + assert _test_read_write_additional_tags_and_comments( + repository, project_json["name"], experiment_id=experiment_json["id"], entity_identifier=feature_json["name"], entity_type="Feature", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_write_metric_regression( + experiment_json, + metric_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the metric domain entity that it wrote.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_write_metric_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) repository.create_metric( domain.Metric(**metric_json), project_json["name"], @@ -658,13 +714,36 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw ).__dict__ assert metric == metric_json - assert __test_additional_tags_and_comments( + assert _test_read_write_additional_tags_and_comments( + repository, project_json["name"], experiment_id=experiment_json["id"], entity_identifier=metric_json["name"], entity_type="Metric", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_write_parameter_regression( + experiment_json, + parameter_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the parameter domain entity that it wrote.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_write_parameter_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) repository.create_parameter( domain.Parameter(**parameter_json), project_json["name"], @@ -677,13 +756,34 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw ).__dict__ assert parameter == parameter_json - assert __test_additional_tags_and_comments( + assert _test_read_write_additional_tags_and_comments( + repository, project_json["name"], experiment_id=experiment_json["id"], entity_identifier=parameter_json["name"], entity_type="Parameter", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_write_artifact_project_regression( + artifact_project_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the artifact (project) domain entity that it wrote.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_write_artifact_project_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) repository.create_artifact( domain.Artifact(**artifact_project_json), ARTIFACT_BINARY, @@ -700,12 +800,35 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw assert artifact_project == artifact_project_json assert artifact_project_data == ARTIFACT_BINARY - assert __test_additional_tags_and_comments( + assert _test_read_write_additional_tags_and_comments( + repository, project_json["name"], entity_identifier=artifact_project_json["id"], entity_type="Artifact", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_write_artifact_experiment_regression( + artifact_experiment_json, + experiment_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the artifact (experiment) domain entity that it wrote.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_write_artifact_experiment_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) repository.create_artifact( domain.Artifact(**artifact_experiment_json), ARTIFACT_BINARY, @@ -725,13 +848,34 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw assert artifact_experiment == artifact_experiment_json assert artifact_experiment_data == ARTIFACT_BINARY - assert __test_additional_tags_and_comments( + assert _test_read_write_additional_tags_and_comments( + repository, project_json["name"], experiment_id=experiment_json["id"], entity_identifier=artifact_experiment_json["id"], entity_type="Artifact", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_write_dataframe_project_regression( + dataframe_project_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the dataframe (project) domain entity that it wrote.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_write_dataframe_project_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) repository.create_dataframe( domain.Dataframe(**dataframe_project_json), DATAFRAME, @@ -748,12 +892,35 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw assert dataframe_project == dataframe_project_json assert dataframe_project_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( + assert _test_read_write_additional_tags_and_comments( + repository, project_json["name"], entity_identifier=dataframe_project_json["id"], entity_type="Dataframe", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_read_write_dataframe_experiment_regression( + dataframe_experiment_json, + experiment_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can read the dataframe (experiment) domain entity that it wrote.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_read_write_dataframe_experiment_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) repository.create_dataframe( domain.Dataframe(**dataframe_experiment_json), DATAFRAME, @@ -773,7 +940,8 @@ def __test_additional_tags_and_comments(project_name, **entity_identification_kw assert dataframe_experiment == dataframe_experiment_json assert dataframe_experiment_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( + assert _test_read_write_additional_tags_and_comments( + repository, project_json["name"], experiment_id=experiment_json["id"], entity_identifier=dataframe_experiment_json["id"], From e96cb967323c51a71fb73b592097ba77536c0573 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Fri, 6 Sep 2024 14:01:28 -0400 Subject: [PATCH 16/16] split write & delete regressions into individual entity tests --- .../regression/test_repository_read_write.py | 527 +++++++++++++----- 1 file changed, 389 insertions(+), 138 deletions(-) diff --git a/tests/regression/test_repository_read_write.py b/tests/regression/test_repository_read_write.py index a7568b8e..8f6cb70f 100644 --- a/tests/regression/test_repository_read_write.py +++ b/tests/regression/test_repository_read_write.py @@ -949,85 +949,73 @@ def test_read_write_dataframe_experiment_regression( ) +def _test_write_additional_tags_and_comments( + repository, tag_dir, project_name, **entity_identification_kwargs +): + is_passing = True + + repository.add_tags( + project_name, + TAGS_TO_ADD, + **entity_identification_kwargs, + ) + repository.remove_tags( + project_name, + TAGS_TO_REMOVE, + **entity_identification_kwargs, + ) + + tag_path = os.path.join(tag_dir, "tags_*.json") + tag_files = repository.filesystem.glob(tag_path, detail=True) + for tag_file in tag_files: + with repository.filesystem.open(tag_file, "r") as file: + tags = json.loads(file.read()) + + if "added_tags" in tags: + is_passing &= tags["added_tags"] == TAGS_TO_ADD + if "removed_tags" in tags: + is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE + + repository.add_comments( + project_name, + COMMENTS_TO_ADD, + **entity_identification_kwargs, + ) + repository.remove_comments( + project_name, + COMMENTS_TO_REMOVE, + **entity_identification_kwargs, + ) + + comment_path = os.path.join(tag_dir, "comments_*.json") + comment_files = repository.filesystem.glob(comment_path, detail=True) + for comment_file in comment_files: + with repository.filesystem.open(comment_file, "r") as file: + comments = json.loads(file.read()) + + if "added_comments" in comments: + is_passing &= comments["added_comments"] == COMMENTS_TO_ADD + if "removed_tags" in comments: + is_passing &= comments["removed_comments"] == COMMENTS_TO_REMOVE + + return is_passing + + @pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) -def test_write_regression( - artifact_project_json, - artifact_experiment_json, - dataframe_project_json, - dataframe_experiment_json, - experiment_json, - feature_json, - metric_json, - parameter_json, +def test_write_project_regression( project_json, repository_class, ): - """Tests that `rubicon_ml` can write each domain entity to the filesystem. - - The `MemoryRepository` skips dataframe tests as the `pandas` API can not be - used to read directly from memory. Dataframe regression tests are covered by - `test_read_write_regression`. - """ + """Tests that `rubicon_ml` can write a project domain entity to the filesystem.""" if repository_class == LocalRepository: temp_dir_context = tempfile.TemporaryDirectory() else: - temp_dir_context = contextlib.nullcontext(enter_result="./test_write_regression/") + temp_dir_context = contextlib.nullcontext(enter_result="./test_write_project_regression/") with temp_dir_context as temp_dir_name: root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") repository = repository_class(root_dir=root_dir) - def __test_additional_tags_and_comments( - tag_dir, project_name, **entity_identification_kwargs - ): - is_passing = True - - repository.add_tags( - project_name, - TAGS_TO_ADD, - **entity_identification_kwargs, - ) - repository.remove_tags( - project_name, - TAGS_TO_REMOVE, - **entity_identification_kwargs, - ) - - tag_path = os.path.join(tag_dir, "tags_*.json") - tag_files = repository.filesystem.glob(tag_path, detail=True) - for tag_file in tag_files: - with repository.filesystem.open(tag_file, "r") as file: - tags = json.loads(file.read()) - - if "added_tags" in tags: - is_passing &= tags["added_tags"] == TAGS_TO_ADD - if "removed_tags" in tags: - is_passing &= tags["removed_tags"] == TAGS_TO_REMOVE - - repository.add_comments( - project_name, - COMMENTS_TO_ADD, - **entity_identification_kwargs, - ) - repository.remove_comments( - project_name, - COMMENTS_TO_REMOVE, - **entity_identification_kwargs, - ) - - comment_path = os.path.join(tag_dir, "comments_*.json") - comment_files = repository.filesystem.glob(comment_path, detail=True) - for comment_file in comment_files: - with repository.filesystem.open(comment_file, "r") as file: - comments = json.loads(file.read()) - - if "added_comments" in comments: - is_passing &= comments["added_comments"] == COMMENTS_TO_ADD - if "removed_tags" in comments: - is_passing &= comments["removed_comments"] == COMMENTS_TO_REMOVE - - return is_passing - repository.create_project(domain.Project(**project_json)) expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) @@ -1038,10 +1026,31 @@ def __test_additional_tags_and_comments( assert project == project_json + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_write_experiment_regression( + experiment_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can write an experiment domain entity to the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_write_experiment_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) repository.create_experiment(domain.Experiment(**experiment_json)) expected_experiment_dir = os.path.join( - expected_project_dir, + root_dir, + slugify(project_json["name"]), "experiments", experiment_json["id"], ) @@ -1051,7 +1060,8 @@ def __test_additional_tags_and_comments( experiment = json.loads(file.read()) assert experiment == experiment_json - assert __test_additional_tags_and_comments( + assert _test_write_additional_tags_and_comments( + repository, expected_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -1059,6 +1069,26 @@ def __test_additional_tags_and_comments( entity_type="Experiment", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_write_feature_regression( + experiment_json, + feature_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can write a feature domain entity to the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_write_feature_regression/") + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) repository.create_feature( domain.Feature(**feature_json), project_json["name"], @@ -1066,7 +1096,10 @@ def __test_additional_tags_and_comments( ) expected_feature_dir = os.path.join( - expected_experiment_dir, + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], "features", slugify(feature_json["name"]), ) @@ -1076,7 +1109,8 @@ def __test_additional_tags_and_comments( feature = json.loads(file.read()) assert feature == feature_json - assert __test_additional_tags_and_comments( + assert _test_write_additional_tags_and_comments( + repository, expected_feature_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -1084,6 +1118,26 @@ def __test_additional_tags_and_comments( entity_type="Feature", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_write_metric_regression( + experiment_json, + metric_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can write a metric domain entity to the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_write_metric_regression/") + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) repository.create_metric( domain.Metric(**metric_json), project_json["name"], @@ -1091,7 +1145,10 @@ def __test_additional_tags_and_comments( ) expected_metric_dir = os.path.join( - expected_experiment_dir, + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], "metrics", slugify(metric_json["name"]), ) @@ -1101,7 +1158,8 @@ def __test_additional_tags_and_comments( metric = json.loads(file.read()) assert metric == metric_json - assert __test_additional_tags_and_comments( + assert _test_write_additional_tags_and_comments( + repository, expected_metric_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -1109,6 +1167,26 @@ def __test_additional_tags_and_comments( entity_type="Metric", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_write_parameter_regression( + experiment_json, + parameter_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can write a parameter domain entity to the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext(enter_result="./test_write_parameter_regression/") + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) repository.create_parameter( domain.Parameter(**parameter_json), project_json["name"], @@ -1116,7 +1194,10 @@ def __test_additional_tags_and_comments( ) expected_parameter_dir = os.path.join( - expected_experiment_dir, + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], "parameters", slugify(parameter_json["name"]), ) @@ -1126,7 +1207,8 @@ def __test_additional_tags_and_comments( parameter = json.loads(file.read()) assert parameter == parameter_json - assert __test_additional_tags_and_comments( + assert _test_write_additional_tags_and_comments( + repository, expected_parameter_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -1134,6 +1216,26 @@ def __test_additional_tags_and_comments( entity_type="Parameter", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_write_artifact_project_regression( + artifact_project_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can write an artifact (project) domain entity to the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_write_artifact_project_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) repository.create_artifact( domain.Artifact(**artifact_project_json), ARTIFACT_BINARY, @@ -1141,7 +1243,8 @@ def __test_additional_tags_and_comments( ) expected_artifact_project_dir = os.path.join( - expected_project_dir, + root_dir, + slugify(project_json["name"]), "artifacts", artifact_project_json["id"], ) @@ -1157,13 +1260,36 @@ def __test_additional_tags_and_comments( assert artifact_project == artifact_project_json assert artifact_project_data == ARTIFACT_BINARY - assert __test_additional_tags_and_comments( + assert _test_write_additional_tags_and_comments( + repository, expected_artifact_project_dir, project_json["name"], entity_identifier=artifact_project_json["id"], entity_type="Artifact", ) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_write_artifact_experiment_regression( + artifact_experiment_json, + experiment_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can write an artifact (experiment) domain entity to the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_write_artifact_experiment_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) repository.create_artifact( domain.Artifact(**artifact_experiment_json), ARTIFACT_BINARY, @@ -1172,7 +1298,10 @@ def __test_additional_tags_and_comments( ) expected_artifact_experiment_dir = os.path.join( - expected_experiment_dir, + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], "artifacts", artifact_experiment_json["id"], ) @@ -1190,7 +1319,8 @@ def __test_additional_tags_and_comments( assert artifact_experiment == artifact_experiment_json assert artifact_experiment_data == ARTIFACT_BINARY - assert __test_additional_tags_and_comments( + assert _test_write_additional_tags_and_comments( + repository, expected_artifact_experiment_dir, project_json["name"], experiment_id=experiment_json["id"], @@ -1198,87 +1328,147 @@ def __test_additional_tags_and_comments( entity_type="Artifact", ) - if repository_class != MemoryRepository: - repository.create_dataframe( - domain.Dataframe(**dataframe_project_json), - DATAFRAME, - project_json["name"], - ) - expected_dataframe_project_dir = os.path.join( - expected_project_dir, - "dataframes", - dataframe_project_json["id"], - ) - expected_dataframe_project_path = os.path.join( - expected_dataframe_project_dir, "metadata.json" - ) +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_write_dataframe_project_regression( + dataframe_project_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can write a dataframe (project) domain entity to the filesystem. + + The `MemoryRepository` skips dataframe data as the `pandas` API can not be used to read directly + from memory. Dataframe data regression tests are covered by `test_read_write_regression`. + """ + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_write_dataframe_project_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_dataframe( + domain.Dataframe(**dataframe_project_json), + DATAFRAME, + project_json["name"], + ) + + expected_dataframe_project_dir = os.path.join( + root_dir, + slugify(project_json["name"]), + "dataframes", + dataframe_project_json["id"], + ) + expected_dataframe_project_path = os.path.join( + expected_dataframe_project_dir, "metadata.json" + ) + with repository.filesystem.open(expected_dataframe_project_path, "r") as file: + dataframe_project = json.loads(file.read()) + + assert dataframe_project == dataframe_project_json + assert _test_write_additional_tags_and_comments( + repository, + expected_dataframe_project_dir, + project_json["name"], + entity_identifier=dataframe_project_json["id"], + entity_type="Dataframe", + ) + + if repository_class != MemoryRepository: expected_dataframe_project_data_path = os.path.join( expected_dataframe_project_dir, "data", "data.parquet" ) - with repository.filesystem.open(expected_dataframe_project_path, "r") as file: - dataframe_project = json.loads(file.read()) dataframe_project_data = pd.read_parquet(expected_dataframe_project_data_path) - assert dataframe_project == dataframe_project_json assert dataframe_project_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( - expected_dataframe_project_dir, - project_json["name"], - entity_identifier=dataframe_project_json["id"], - entity_type="Dataframe", - ) - repository.create_dataframe( - domain.Dataframe(**dataframe_experiment_json), - DATAFRAME, - project_json["name"], - experiment_json["id"], - ) - expected_dataframe_experiment_dir = os.path.join( - expected_experiment_dir, - "dataframes", - dataframe_experiment_json["id"], - ) - expected_dataframe_experiment_path = os.path.join( - expected_dataframe_experiment_dir, "metadata.json" - ) +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_write_dataframe_experiment_regression( + dataframe_experiment_json, + experiment_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can write a dataframe (experiment) domain entity to the filesystem. + + The `MemoryRepository` skips dataframe data as the `pandas` API can not be used to read directly + from memory. Dataframe data regression tests are covered by `test_read_write_regression`. + """ + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_write_dataframe_experiment_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) + + repository.create_project(domain.Project(**project_json)) + repository.create_experiment(domain.Experiment(**experiment_json)) + repository.create_dataframe( + domain.Dataframe(**dataframe_experiment_json), + DATAFRAME, + project_json["name"], + experiment_json["id"], + ) + + expected_dataframe_experiment_dir = os.path.join( + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], + "dataframes", + dataframe_experiment_json["id"], + ) + expected_dataframe_experiment_path = os.path.join( + expected_dataframe_experiment_dir, "metadata.json" + ) + + with repository.filesystem.open(expected_dataframe_experiment_path, "r") as file: + dataframe_experiment = json.loads(file.read()) + + assert dataframe_experiment == dataframe_experiment_json + assert _test_write_additional_tags_and_comments( + repository, + expected_dataframe_experiment_dir, + project_json["name"], + experiment_id=experiment_json["id"], + entity_identifier=dataframe_experiment_json["id"], + entity_type="Dataframe", + ) + + if repository_class != MemoryRepository: expected_dataframe_experiment_data_path = os.path.join( expected_dataframe_experiment_dir, "data", "data.parquet" ) - with repository.filesystem.open(expected_dataframe_experiment_path, "r") as file: - dataframe_experiment = json.loads(file.read()) dataframe_experiment_data = pd.read_parquet(expected_dataframe_experiment_data_path) - assert dataframe_experiment == dataframe_experiment_json assert dataframe_experiment_data.equals(DATAFRAME) - assert __test_additional_tags_and_comments( - expected_dataframe_experiment_dir, - project_json["name"], - experiment_id=experiment_json["id"], - entity_identifier=dataframe_experiment_json["id"], - entity_type="Dataframe", - ) @pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) -def test_delete_regression( +def test_delete_artifact_project_regression( artifact_project_json, - artifact_experiment_json, - dataframe_project_json, - dataframe_experiment_json, - experiment_json, project_json, repository_class, ): - """Tests that `rubicon_ml` can delete artifacts and dataframes from the filesystem.""" + """Tests that `rubicon_ml` can delete an artifact (project) domain from the filesystem.""" if repository_class == LocalRepository: temp_dir_context = tempfile.TemporaryDirectory() else: - temp_dir_context = contextlib.nullcontext(enter_result="./test_delete_regression/") + temp_dir_context = contextlib.nullcontext( + enter_result="./test_delete_artifact_project_regression/" + ) with temp_dir_context as temp_dir_name: root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") @@ -1290,9 +1480,9 @@ def test_delete_regression( project_json["name"], ) - expected_project_dir = os.path.join(root_dir, slugify(project_json["name"])) expected_artifact_project_path = os.path.join( - expected_project_dir, + root_dir, + slugify(project_json["name"]), "artifacts", artifact_project_json["id"], "metadata.json", @@ -1307,6 +1497,25 @@ def test_delete_regression( assert not repository.filesystem.exists(expected_artifact_project_path) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_delete_artifact_experiment_regression( + artifact_experiment_json, + experiment_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can delete an artifact (experiment) domain from the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_delete_artifact_experiment_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) repository.create_artifact( domain.Artifact(**artifact_experiment_json), ARTIFACT_BINARY, @@ -1315,7 +1524,8 @@ def test_delete_regression( ) expected_experiment_dir = os.path.join( - expected_project_dir, + root_dir, + slugify(project_json["name"]), "experiments", experiment_json["id"], ) @@ -1336,6 +1546,24 @@ def test_delete_regression( assert not repository.filesystem.exists(expected_artifact_experiment_path) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_delete_dataframe_project_regression( + dataframe_project_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can delete a dataframe (project) domain from the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_delete_dataframe_project_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) repository.create_dataframe( domain.Dataframe(**dataframe_project_json), DATAFRAME, @@ -1343,7 +1571,8 @@ def test_delete_regression( ) expected_dataframe_project_path = os.path.join( - expected_project_dir, + root_dir, + slugify(project_json["name"]), "dataframes", dataframe_project_json["id"], "metadata.json", @@ -1358,6 +1587,25 @@ def test_delete_regression( assert not repository.filesystem.exists(expected_dataframe_project_path) + +@pytest.mark.parametrize("repository_class", REPOSITORIES_TO_TEST) +def test_delete_dataframe_experiment_regression( + dataframe_experiment_json, + experiment_json, + project_json, + repository_class, +): + """Tests that `rubicon_ml` can delete a dataframe (experiment) domain from the filesystem.""" + if repository_class == LocalRepository: + temp_dir_context = tempfile.TemporaryDirectory() + else: + temp_dir_context = contextlib.nullcontext( + enter_result="./test_delete_dataframe_experiment_regression/" + ) + + with temp_dir_context as temp_dir_name: + root_dir = os.path.join(temp_dir_name, "test-rubicon-ml") + repository = repository_class(root_dir=root_dir) repository.create_dataframe( domain.Dataframe(**dataframe_experiment_json), DATAFRAME, @@ -1366,7 +1614,10 @@ def test_delete_regression( ) expected_dataframe_experiment_path = os.path.join( - expected_experiment_dir, + root_dir, + slugify(project_json["name"]), + "experiments", + experiment_json["id"], "dataframes", dataframe_experiment_json["id"], "metadata.json",