From 265b1efa8007212bb60581447a32012795243435 Mon Sep 17 00:00:00 2001 From: Sonaal Thaker Date: Tue, 6 Feb 2024 15:04:57 -0800 Subject: [PATCH 1/4] adding commentmixin to domain layer, experiment domain, base repo + test --- rubicon_ml/domain/experiment.py | 5 +- rubicon_ml/domain/mixin.py | 15 ++++++ rubicon_ml/repository/base.py | 61 +++++++++++++++++++++++++ tests/unit/domain/test_mixin.py | 14 ++++-- tests/unit/repository/test_base_repo.py | 22 +++++++++ 5 files changed, 112 insertions(+), 5 deletions(-) diff --git a/rubicon_ml/domain/experiment.py b/rubicon_ml/domain/experiment.py index d5118b37..7912cf2f 100644 --- a/rubicon_ml/domain/experiment.py +++ b/rubicon_ml/domain/experiment.py @@ -4,12 +4,12 @@ from datetime import datetime from typing import List, Optional -from rubicon_ml.domain.mixin import TagMixin +from rubicon_ml.domain.mixin import TagMixin, CommentMixin from rubicon_ml.domain.utils import TrainingMetadata, uuid @dataclass -class Experiment(TagMixin): +class Experiment(TagMixin, CommentMixin): project_name: str id: str = field(default_factory=uuid.uuid4) @@ -20,4 +20,5 @@ class Experiment(TagMixin): commit_hash: Optional[str] = None training_metadata: Optional[TrainingMetadata] = None tags: List[str] = field(default_factory=list) + comments: List[str] = field(default_factory=list) created_at: datetime = field(default_factory=datetime.utcnow) diff --git a/rubicon_ml/domain/mixin.py b/rubicon_ml/domain/mixin.py index 974f3bdb..829a5922 100644 --- a/rubicon_ml/domain/mixin.py +++ b/rubicon_ml/domain/mixin.py @@ -25,3 +25,18 @@ def remove_tags(self, tags: List[str]): A list of string tags to remove from this domain model. """ self.tags = list(set(self.tags).difference(set(tags))) + + +class CommentMixin: + """Adds comment support to a domain model.""" + + def add_comments(self, comments: List[str]): + """ + Add new comments to this model. + + Parameters + ---------- + comments : List[str] + A list of string comments to add to the domain model. + """ + self.comments = list(set(self.comments).union(set(comments))) diff --git a/rubicon_ml/repository/base.py b/rubicon_ml/repository/base.py index 950cefd7..1aa5d299 100644 --- a/rubicon_ml/repository/base.py +++ b/rubicon_ml/repository/base.py @@ -1212,3 +1212,64 @@ def get_tags(self, project_name, experiment_id=None, entity_identifier=None, ent sorted_tag_data = [json.loads(tag_data[p]) for _, p in sorted_tag_paths] return sorted_tag_data + + # ---------- Comments ---------- + + def _get_comment_metadata_root( + self, project_name, experiment_id=None, entity_identifier=None, entity_type=None + ): + """Returns the directory to write comments to.""" + get_metadata_root_lookup = { + "Artifact": self._get_artifact_metadata_root, + "Dataframe": self._get_dataframe_metadata_root, + "Experiment": self._get_experiment_metadata_root, + "Metric": self._get_metric_metadata_root, + "Feature": self._get_feature_metadata_root, + "Parameter": self._get_parameter_metadata_root, + } + + try: + get_metadata_root = get_metadata_root_lookup[entity_type] + except KeyError: + raise ValueError("`experiment_id` and `entity_identifier` can not both be `None`.") + + if entity_type == "Experiment": + experiment_metadata_root = get_metadata_root(project_name) + + return f"{experiment_metadata_root}/{experiment_id}" + else: + entity_metadata_root = get_metadata_root(project_name, experiment_id) + + # We want to slugify the names of Metrics, Features, and Parameters- not Artifacts, Dataframes, or Experiments + if entity_type in ["Metric", "Feature", "Parameter"]: + entity_identifier = slugify(entity_identifier) + return f"{entity_metadata_root}/{entity_identifier}" + + def add_comments( + self, project_name, comments, experiment_id=None, entity_identifier=None, entity_type=None + ): + """Persist comments to the configured filesystem. + + Parameters + ---------- + project_name : str + The name of the project the object to comment + belongs to. + comments : list of str + The comment values to persist. + experiment_id : str, optional + The ID of the experiment to apply the comments + `comments` to. + entity_identifier : str, optional + The ID or name of the entity to apply the comments + `comments` to. + entity_type : str, optional + The name of the entity's type as returned by + `entity_cls.__class__.__name__`. + """ + comment_metadata_root = self._get_comment_metadata_root( + project_name, experiment_id, entity_identifier, entity_type + ) + comment_metadata_path = f"{comment_metadata_root}/comments_{domain.utils.uuid.uuid4()}.json" + + self._persist_domain({"added_comments": comments}, comment_metadata_path) diff --git a/tests/unit/domain/test_mixin.py b/tests/unit/domain/test_mixin.py index 3d84792b..74b278a8 100644 --- a/tests/unit/domain/test_mixin.py +++ b/tests/unit/domain/test_mixin.py @@ -1,9 +1,10 @@ -from rubicon_ml.domain.mixin import TagMixin +from rubicon_ml.domain.mixin import TagMixin, CommentMixin -class Taggable(TagMixin): - def __init__(self, tags=[]): +class Taggable(TagMixin, CommentMixin): + def __init__(self, tags=[], comments=[]): self.tags = tags + self.comments = comments def test_add_tags(): @@ -25,3 +26,10 @@ def test_remove_tags(): taggable.remove_tags(["x"]) assert taggable.tags == ["y"] + + +def test_add_comments(): + taggable = Taggable() + taggable.add_comments(["x"]) + + assert taggable.comments == ["x"] diff --git a/tests/unit/repository/test_base_repo.py b/tests/unit/repository/test_base_repo.py index 4ceba57b..de327cdf 100644 --- a/tests/unit/repository/test_base_repo.py +++ b/tests/unit/repository/test_base_repo.py @@ -980,3 +980,25 @@ def test_get_tags_with_no_results(memory_repository): ) assert tags == [] + + +def test_add_comments(memory_repository): + repository = memory_repository + experiment = _create_experiment(repository) + repository.add_comments( + experiment.project_name, + ["wow"], + experiment_id=experiment.id, + entity_type=experiment.__class__.__name__, + ) + + comments_glob = f"{repository.root_dir}/{slugify(experiment.project_name)}/experiments/{experiment.id}/comments_*.json" + comments_files = repository.filesystem.glob(comments_glob) + + assert len(comments_files) == 1 + + open_file = repository.filesystem.open(comments_files[0]) + with open_file as f: + comments_json = json.load(f) + + assert ["wow"] == comments_json["added_comments"] From 6fd8ee7be065d51cb18f3baac39f6375c91cb163 Mon Sep 17 00:00:00 2001 From: sonaalthaker Date: Wed, 14 Feb 2024 14:27:24 -0800 Subject: [PATCH 2/4] addressing comments --- rubicon_ml/domain/mixin.py | 2 +- rubicon_ml/repository/base.py | 29 ++++--------------------- tests/unit/repository/test_base_repo.py | 4 ++-- 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/rubicon_ml/domain/mixin.py b/rubicon_ml/domain/mixin.py index 829a5922..6485f50f 100644 --- a/rubicon_ml/domain/mixin.py +++ b/rubicon_ml/domain/mixin.py @@ -39,4 +39,4 @@ def add_comments(self, comments: List[str]): comments : List[str] A list of string comments to add to the domain model. """ - self.comments = list(set(self.comments).union(set(comments))) + self.comments.extend(comments) diff --git a/rubicon_ml/repository/base.py b/rubicon_ml/repository/base.py index 1aa5d299..93b28303 100644 --- a/rubicon_ml/repository/base.py +++ b/rubicon_ml/repository/base.py @@ -1219,31 +1219,10 @@ def _get_comment_metadata_root( self, project_name, experiment_id=None, entity_identifier=None, entity_type=None ): """Returns the directory to write comments to.""" - get_metadata_root_lookup = { - "Artifact": self._get_artifact_metadata_root, - "Dataframe": self._get_dataframe_metadata_root, - "Experiment": self._get_experiment_metadata_root, - "Metric": self._get_metric_metadata_root, - "Feature": self._get_feature_metadata_root, - "Parameter": self._get_parameter_metadata_root, - } - - try: - get_metadata_root = get_metadata_root_lookup[entity_type] - except KeyError: - raise ValueError("`experiment_id` and `entity_identifier` can not both be `None`.") - - if entity_type == "Experiment": - experiment_metadata_root = get_metadata_root(project_name) - - return f"{experiment_metadata_root}/{experiment_id}" - else: - entity_metadata_root = get_metadata_root(project_name, experiment_id) - - # We want to slugify the names of Metrics, Features, and Parameters- not Artifacts, Dataframes, or Experiments - if entity_type in ["Metric", "Feature", "Parameter"]: - entity_identifier = slugify(entity_identifier) - return f"{entity_metadata_root}/{entity_identifier}" + # comments and tags are currently written to the same root with a different filename + return self._get_tag_metadata_root( + project_name, experiment_id, entity_identifier, entity_type + ) def add_comments( self, project_name, comments, experiment_id=None, entity_identifier=None, entity_type=None diff --git a/tests/unit/repository/test_base_repo.py b/tests/unit/repository/test_base_repo.py index de327cdf..2405a0ae 100644 --- a/tests/unit/repository/test_base_repo.py +++ b/tests/unit/repository/test_base_repo.py @@ -987,7 +987,7 @@ def test_add_comments(memory_repository): experiment = _create_experiment(repository) repository.add_comments( experiment.project_name, - ["wow"], + ["this is a comment"], experiment_id=experiment.id, entity_type=experiment.__class__.__name__, ) @@ -1001,4 +1001,4 @@ def test_add_comments(memory_repository): with open_file as f: comments_json = json.load(f) - assert ["wow"] == comments_json["added_comments"] + assert ["this is a comment"] == comments_json["added_comments"] From 3b996869244c3efb13c220c8e36639a33b4dedb2 Mon Sep 17 00:00:00 2001 From: sonaalthaker Date: Thu, 15 Feb 2024 10:24:06 -0800 Subject: [PATCH 3/4] isort version updates --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d90a63ad..348a6b5c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: exclude: (versioneer.py|_version.py) - repo: https://github.com/timothycrosley/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort From 3a8e0ca878ce8756deaa8fab75e8067d5935ac53 Mon Sep 17 00:00:00 2001 From: sonaalthaker Date: Thu, 15 Feb 2024 10:29:02 -0800 Subject: [PATCH 4/4] isort fixes --- rubicon_ml/domain/experiment.py | 2 +- tests/unit/domain/test_mixin.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rubicon_ml/domain/experiment.py b/rubicon_ml/domain/experiment.py index 7912cf2f..e0dd4b38 100644 --- a/rubicon_ml/domain/experiment.py +++ b/rubicon_ml/domain/experiment.py @@ -4,7 +4,7 @@ from datetime import datetime from typing import List, Optional -from rubicon_ml.domain.mixin import TagMixin, CommentMixin +from rubicon_ml.domain.mixin import CommentMixin, TagMixin from rubicon_ml.domain.utils import TrainingMetadata, uuid diff --git a/tests/unit/domain/test_mixin.py b/tests/unit/domain/test_mixin.py index 74b278a8..3b2ea6f1 100644 --- a/tests/unit/domain/test_mixin.py +++ b/tests/unit/domain/test_mixin.py @@ -1,4 +1,4 @@ -from rubicon_ml.domain.mixin import TagMixin, CommentMixin +from rubicon_ml.domain.mixin import CommentMixin, TagMixin class Taggable(TagMixin, CommentMixin):