diff --git a/src/ms3/cli.py b/src/ms3/cli.py index 52d67150..2b5c30ff 100644 --- a/src/ms3/cli.py +++ b/src/ms3/cli.py @@ -17,6 +17,7 @@ Parse, compute_path_from_file, get_git_repo, + get_git_version_info, make_coloring_reports_and_warnings, ) from ms3._version import __version__ @@ -284,6 +285,19 @@ def transform_cmd(args): "-X (expanded), -F (form_labels), -E (events), -C (chords), -D (metadata)" ) return + + repo = get_git_repo(args.dir) + if repo is None: + version_info = None + elif repo.is_dirty(): + print( + "The repository is dirty. Please commit or stash your changes before running ms3 transform. This is " + "important because the version information in the JSON descriptor(s) needs to be consistent with the " + "repository state." + ) + return + else: + version_info = get_git_version_info(repo=repo) parse_obj = make_parse_obj(args, parse_tsv=True, facets=params) filename = os.path.basename(args.dir) func = transform_to_resources if args.resources else transform_to_package @@ -300,6 +314,7 @@ def transform_cmd(args): zipped=not args.uncompressed, overwrite=args.safe, log_level=args.level, + custom_metadata=version_info, ) diff --git a/src/ms3/corpus.py b/src/ms3/corpus.py index 0c4785b6..6425e22d 100644 --- a/src/ms3/corpus.py +++ b/src/ms3/corpus.py @@ -24,6 +24,7 @@ from ms3.utils.functions import ( compute_path_from_file, get_git_repo, + get_git_version_info, get_name_of_highest_version_tag, ) @@ -135,9 +136,6 @@ def __init__( assert os.path.isdir(directory), f"{directory} is not an existing directory." self.corpus_path: str = directory """Path where the corpus is located.""" - self.repo: Optional[git.Repo] = None - """If the corpus is part of a git repository, this attribute holds the corresponding :obj:`git.Repo` object.""" - self.repo = get_git_repo(directory, logger=self.logger) self.name = os.path.basename(directory).strip(r"\/") """Folder name of the corpus.""" if ( @@ -150,6 +148,10 @@ def __init__( # logger_cfg['level'] = 'w' super().__init__(subclass="Corpus", logger_cfg=logger_cfg) + self.repo: Optional[git.Repo] = None + """If the corpus is part of a git repository, this attribute holds the corresponding :obj:`git.Repo` object.""" + self.repo = get_git_repo(directory, logger=self.logger) + self.files: List[File] = [] """ ``[File]`` list of :obj:`File` data objects containing information on the file location @@ -1386,6 +1388,17 @@ def get_present_facets(self, view_name: Optional[str] = None) -> List[str]: result.update(detected_facets.keys()) return list(result) + def get_version_info(self, only_if_clean: bool = True) -> Dict[str, str]: + if self.repo is None: + self.logger.debug("No git repo, no version info.") + return {} + version_info = get_git_version_info(self.repo, only_if_clean=only_if_clean) + if only_if_clean and not version_info: + self.logger.info( + "Git repo is dirty and only_if_clean=True. Returning empty version info." + ) + return version_info + def get_view(self, view_name: Optional[str] = None, **config) -> View: """Retrieve an existing or create a new View object, potentially while updating the config.""" if view_name in self._views: @@ -3143,6 +3156,10 @@ def store_extracted_facets( self.logger.info( f"Extracting {len(facets)} facets from {n_scores} of the {self.n_parsed_scores} parsed scores." ) + if frictionless: + version_info = self.get_version_info() + else: + version_info = None if target > 0: for piece, piece_obj in self.iter_pieces(view_name=view_name): for file, facet2dataframe in piece_obj.iter_extracted_facets( @@ -3181,6 +3198,7 @@ def store_extracted_facets( raise_exception=False, write_or_remove_errors_file=True, logger=self.logger, + custom_metadata=version_info, ) paths.append(descriptor_or_resource_path) if output_metadata: diff --git a/src/ms3/operations.py b/src/ms3/operations.py index 8dbc8f89..035f79b0 100644 --- a/src/ms3/operations.py +++ b/src/ms3/operations.py @@ -5,27 +5,25 @@ from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union import pandas as pd -from ms3 import ( - Corpus, - Parse, - make_valid_frictionless_name, - resolve_facets_param, - store_dataframe_resource, - store_dataframes_package, -) from ms3._typing import AnnotationsFacet, TSVtype, TSVtypes +from ms3.corpus import Corpus from ms3.logger import ( MessageType, get_ignored_warning_ids, get_logger, temporarily_suppress_warnings, ) +from ms3.parse import Parse from ms3.utils import ( capture_parse_logs, check_argument_against_literal_type, compute_path_from_file, fifths2name, + make_valid_frictionless_name, pretty_dict, + resolve_facets_param, + store_dataframe_resource, + store_dataframes_package, tpc2scale_degree, write_tsv, ) @@ -504,6 +502,7 @@ def transform_to_resources( raise_exception: bool = False, write_or_remove_errors_file: bool = True, log_level="i", + custom_metadata: Optional[dict] = None, ): logger = get_logger("ms3.transform", level=log_level) for df, facet, output_folder, prefix, msg in _transform( @@ -530,6 +529,7 @@ def transform_to_resources( descriptor_extension="json", raise_exception=raise_exception, write_or_remove_errors_file=write_or_remove_errors_file, + custom_metadata=custom_metadata, ) logger.info(msg) @@ -548,6 +548,7 @@ def transform_to_package( raise_exception: bool = False, write_or_remove_errors_file: bool = True, log_level="i", + custom_metadata: Optional[dict] = None, ): logger = get_logger("ms3.transform", level=log_level) dfs, returned_facets = [], [] @@ -581,6 +582,7 @@ def transform_to_package( raise_exception=raise_exception, write_or_remove_errors_file=write_or_remove_errors_file, logger=logger, + custom_metadata=custom_metadata, ) diff --git a/src/ms3/piece.py b/src/ms3/piece.py index 95f0b273..2ac16987 100644 --- a/src/ms3/piece.py +++ b/src/ms3/piece.py @@ -40,6 +40,7 @@ compute_path_from_file, disambiguate_files, files2disambiguation_dict, + get_git_version_info, get_musescore, infer_tsv_type, load_tsv, @@ -1787,6 +1788,15 @@ def store_extracted_facet( if unfold: piece_name += "_unfolded" directory = compute_path_from_file(file, root_dir=root_dir, folder=folder) + version_info = None + if create_descriptor: + try: + version_info = get_git_version_info( + repo_path=file.directory, + only_if_clean=True, + ) + except AssertionError: + pass store_dataframe_resource( df=df, directory=directory, @@ -1797,6 +1807,7 @@ def store_extracted_facet( raise_exception=raise_exception, write_or_remove_errors_file=write_or_remove_errors_file, logger=self.logger, + custom_metadata=version_info, ) # def store_parsed_scores(self, diff --git a/src/ms3/utils/frictionless_helpers.py b/src/ms3/utils/frictionless_helpers.py index 697963ed..763348ac 100644 --- a/src/ms3/utils/frictionless_helpers.py +++ b/src/ms3/utils/frictionless_helpers.py @@ -628,6 +628,7 @@ def store_dataframe_resource( raise_exception: bool = True, write_or_remove_errors_file: bool = True, logger=None, + custom_metadata: dict = None, **kwargs, ) -> Optional[str]: """Write a DataFrame to a TSV or CSV file together with its frictionless resource descriptor. @@ -699,6 +700,8 @@ def store_dataframe_resource( if not frictionless: return resource_path try: + if custom_metadata is None: + custom_metadata = {} descriptor_path = make_and_store_resource_descriptor( df=df, directory=directory, @@ -710,6 +713,7 @@ def store_dataframe_resource( include_index_levels=include_index_levels, creator=DEFAULT_CREATOR_METADATA, # custom metadata field for descriptor, passed as kwarg logger=logger, + **custom_metadata, ) except ValueError as e: descriptor_path = None @@ -738,6 +742,7 @@ def store_dataframes_package( raise_exception: bool = True, write_or_remove_errors_file: bool = True, logger=None, + custom_metadata: Optional[dict] = None, ): """Write a DataFrame to a TSV or CSV file together with its frictionless resource descriptor. Uses: :py:func:`write_tsv` @@ -774,6 +779,7 @@ def store_dataframes_package( name=piece_name, resources=[], ) + for df, facet in zip(dataframes, facets): resource_path = store_dataframe_resource( df=df, @@ -797,6 +803,8 @@ def store_dataframes_package( if not frictionless: return package_descriptor["creator"] = DEFAULT_CREATOR_METADATA # custom metadata field + if custom_metadata is not None: + package_descriptor.update(custom_metadata) package_descriptor_filepath = f"{piece_name}.datapackage.{descriptor_extension}" package_descriptor_path = os.path.join(directory, package_descriptor_filepath) store_as_json_or_yaml( diff --git a/src/ms3/utils/functions.py b/src/ms3/utils/functions.py index 675502aa..500cc9d9 100644 --- a/src/ms3/utils/functions.py +++ b/src/ms3/utils/functions.py @@ -1856,6 +1856,81 @@ def get_git_repo( return repo +def get_git_revision( + repo: Optional[git.Repo] = None, + repo_path: Optional[str] = None, +) -> str: + if repo is None: + repo = get_git_repo(repo_path) + assert repo is not None, "No git repo to get the version from." + else: + assert repo_path is None, "Pass either repo or repo_path, not both." + return repo.head.commit.hexsha + + +@overload +def get_git_tag( + repo: Optional[git.Repo], repo_path: Optional[str], always: Literal[True] +) -> str: + ... + + +@overload +def get_git_tag( + repo: Optional[git.Repo], repo_path: Optional[str], always: Literal[False] +) -> Optional[str]: + ... + + +def get_git_tag( + repo: Optional[git.Repo] = None, + repo_path: Optional[str] = None, + always: bool = True, +) -> Optional[str]: + """If always is set to True and no tags are found, the commit short hash is returned instead.""" + if repo is None: + repo = get_git_repo(repo_path) + assert repo is not None, "No git repo to get the version from." + else: + assert repo_path is None, "Pass either repo or repo_path, not both." + if always: + return repo.git.describe(tags=True, always=always) + try: + return repo.git.describe(tags=True) + except Exception: + return + + +def get_git_version_info( + repo: Optional[git.Repo] = None, + repo_path: Optional[str] = None, + only_if_clean: bool = True, +): + if repo is None: + repo = get_git_repo(repo_path) + assert repo is not None, "No git repo to get the version from." + else: + assert repo_path is None, "Pass either repo or repo_path, not both." + if only_if_clean and repo.is_dirty(): + return {} + return dict( + git_revision=get_git_revision(repo=repo), + git_tag=get_git_tag(repo=repo, always=True), + ) + + +def git_repo_is_clean( + repo: Optional[git.Repo] = None, + repo_path: Optional[str] = None, +) -> bool: + if repo is None: + repo = get_git_repo(repo_path) + assert repo is not None, "No git repo to get the version from." + else: + assert repo_path is None, "Pass either repo or repo_path, not both." + return not repo.is_dirty() + + def get_ms_version(mscx_file): with open(mscx_file, encoding="utf-8") as file: for i, l in enumerate(file): diff --git a/tests/test_metarepo_files/debugging.py b/tests/test_metarepo_files/debugging.py index 9ec0aef2..a14724f3 100644 --- a/tests/test_metarepo_files/debugging.py +++ b/tests/test_metarepo_files/debugging.py @@ -8,7 +8,7 @@ """ import os.path -from ms3 import Parse, Score +from ms3 import Parse from ms3.logger import get_logger from ms3.operations import transform_to_resources