Skip to content

Commit

Permalink
adds 'git_revision' and 'git_tag' to JSON descriptors whenever the gi…
Browse files Browse the repository at this point in the history
…t repo is clean
  • Loading branch information
johentsch committed Oct 20, 2023
1 parent fd146f6 commit 5b76a81
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 12 deletions.
15 changes: 15 additions & 0 deletions src/ms3/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Parse,
compute_path_from_file,
get_git_repo,
get_git_version_info,
make_coloring_reports_and_warnings,
)
from ms3._version import __version__
Expand Down Expand Up @@ -284,6 +285,19 @@ def transform_cmd(args):
"-X (expanded), -F (form_labels), -E (events), -C (chords), -D (metadata)"
)
return

repo = get_git_repo(args.dir)
if repo is None:
version_info = None
elif repo.is_dirty():
print(
"The repository is dirty. Please commit or stash your changes before running ms3 transform. This is "
"important because the version information in the JSON descriptor(s) needs to be consistent with the "
"repository state."
)
return
else:
version_info = get_git_version_info(repo=repo)
parse_obj = make_parse_obj(args, parse_tsv=True, facets=params)
filename = os.path.basename(args.dir)
func = transform_to_resources if args.resources else transform_to_package
Expand All @@ -300,6 +314,7 @@ def transform_cmd(args):
zipped=not args.uncompressed,
overwrite=args.safe,
log_level=args.level,
custom_metadata=version_info,
)


Expand Down
24 changes: 21 additions & 3 deletions src/ms3/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ms3.utils.functions import (
compute_path_from_file,
get_git_repo,
get_git_version_info,
get_name_of_highest_version_tag,
)

Expand Down Expand Up @@ -135,9 +136,6 @@ def __init__(
assert os.path.isdir(directory), f"{directory} is not an existing directory."
self.corpus_path: str = directory
"""Path where the corpus is located."""
self.repo: Optional[git.Repo] = None
"""If the corpus is part of a git repository, this attribute holds the corresponding :obj:`git.Repo` object."""
self.repo = get_git_repo(directory, logger=self.logger)
self.name = os.path.basename(directory).strip(r"\/")
"""Folder name of the corpus."""
if (
Expand All @@ -150,6 +148,10 @@ def __init__(
# logger_cfg['level'] = 'w'
super().__init__(subclass="Corpus", logger_cfg=logger_cfg)

self.repo: Optional[git.Repo] = None
"""If the corpus is part of a git repository, this attribute holds the corresponding :obj:`git.Repo` object."""
self.repo = get_git_repo(directory, logger=self.logger)

self.files: List[File] = []
"""
``[File]`` list of :obj:`File` data objects containing information on the file location
Expand Down Expand Up @@ -1386,6 +1388,17 @@ def get_present_facets(self, view_name: Optional[str] = None) -> List[str]:
result.update(detected_facets.keys())
return list(result)

def get_version_info(self, only_if_clean: bool = True) -> Dict[str, str]:
if self.repo is None:
self.logger.debug("No git repo, no version info.")
return {}
version_info = get_git_version_info(self.repo, only_if_clean=only_if_clean)
if only_if_clean and not version_info:
self.logger.info(
"Git repo is dirty and only_if_clean=True. Returning empty version info."
)
return version_info

def get_view(self, view_name: Optional[str] = None, **config) -> View:
"""Retrieve an existing or create a new View object, potentially while updating the config."""
if view_name in self._views:
Expand Down Expand Up @@ -3143,6 +3156,10 @@ def store_extracted_facets(
self.logger.info(
f"Extracting {len(facets)} facets from {n_scores} of the {self.n_parsed_scores} parsed scores."
)
if frictionless:
version_info = self.get_version_info()
else:
version_info = None
if target > 0:
for piece, piece_obj in self.iter_pieces(view_name=view_name):
for file, facet2dataframe in piece_obj.iter_extracted_facets(
Expand Down Expand Up @@ -3181,6 +3198,7 @@ def store_extracted_facets(
raise_exception=False,
write_or_remove_errors_file=True,
logger=self.logger,
custom_metadata=version_info,
)
paths.append(descriptor_or_resource_path)
if output_metadata:
Expand Down
18 changes: 10 additions & 8 deletions src/ms3/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,25 @@
from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union

import pandas as pd
from ms3 import (
Corpus,
Parse,
make_valid_frictionless_name,
resolve_facets_param,
store_dataframe_resource,
store_dataframes_package,
)
from ms3._typing import AnnotationsFacet, TSVtype, TSVtypes
from ms3.corpus import Corpus
from ms3.logger import (
MessageType,
get_ignored_warning_ids,
get_logger,
temporarily_suppress_warnings,
)
from ms3.parse import Parse
from ms3.utils import (
capture_parse_logs,
check_argument_against_literal_type,
compute_path_from_file,
fifths2name,
make_valid_frictionless_name,
pretty_dict,
resolve_facets_param,
store_dataframe_resource,
store_dataframes_package,
tpc2scale_degree,
write_tsv,
)
Expand Down Expand Up @@ -504,6 +502,7 @@ def transform_to_resources(
raise_exception: bool = False,
write_or_remove_errors_file: bool = True,
log_level="i",
custom_metadata: Optional[dict] = None,
):
logger = get_logger("ms3.transform", level=log_level)
for df, facet, output_folder, prefix, msg in _transform(
Expand All @@ -530,6 +529,7 @@ def transform_to_resources(
descriptor_extension="json",
raise_exception=raise_exception,
write_or_remove_errors_file=write_or_remove_errors_file,
custom_metadata=custom_metadata,
)
logger.info(msg)

Expand All @@ -548,6 +548,7 @@ def transform_to_package(
raise_exception: bool = False,
write_or_remove_errors_file: bool = True,
log_level="i",
custom_metadata: Optional[dict] = None,
):
logger = get_logger("ms3.transform", level=log_level)
dfs, returned_facets = [], []
Expand Down Expand Up @@ -581,6 +582,7 @@ def transform_to_package(
raise_exception=raise_exception,
write_or_remove_errors_file=write_or_remove_errors_file,
logger=logger,
custom_metadata=custom_metadata,
)


Expand Down
11 changes: 11 additions & 0 deletions src/ms3/piece.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
compute_path_from_file,
disambiguate_files,
files2disambiguation_dict,
get_git_version_info,
get_musescore,
infer_tsv_type,
load_tsv,
Expand Down Expand Up @@ -1787,6 +1788,15 @@ def store_extracted_facet(
if unfold:
piece_name += "_unfolded"
directory = compute_path_from_file(file, root_dir=root_dir, folder=folder)
version_info = None
if create_descriptor:
try:
version_info = get_git_version_info(
repo_path=file.directory,
only_if_clean=True,
)
except AssertionError:
pass
store_dataframe_resource(
df=df,
directory=directory,
Expand All @@ -1797,6 +1807,7 @@ def store_extracted_facet(
raise_exception=raise_exception,
write_or_remove_errors_file=write_or_remove_errors_file,
logger=self.logger,
custom_metadata=version_info,
)

# def store_parsed_scores(self,
Expand Down
8 changes: 8 additions & 0 deletions src/ms3/utils/frictionless_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ def store_dataframe_resource(
raise_exception: bool = True,
write_or_remove_errors_file: bool = True,
logger=None,
custom_metadata: dict = None,
**kwargs,
) -> Optional[str]:
"""Write a DataFrame to a TSV or CSV file together with its frictionless resource descriptor.
Expand Down Expand Up @@ -699,6 +700,8 @@ def store_dataframe_resource(
if not frictionless:
return resource_path
try:
if custom_metadata is None:
custom_metadata = {}
descriptor_path = make_and_store_resource_descriptor(
df=df,
directory=directory,
Expand All @@ -710,6 +713,7 @@ def store_dataframe_resource(
include_index_levels=include_index_levels,
creator=DEFAULT_CREATOR_METADATA, # custom metadata field for descriptor, passed as kwarg
logger=logger,
**custom_metadata,
)
except ValueError as e:
descriptor_path = None
Expand Down Expand Up @@ -738,6 +742,7 @@ def store_dataframes_package(
raise_exception: bool = True,
write_or_remove_errors_file: bool = True,
logger=None,
custom_metadata: Optional[dict] = None,
):
"""Write a DataFrame to a TSV or CSV file together with its frictionless resource descriptor.
Uses: :py:func:`write_tsv`
Expand Down Expand Up @@ -774,6 +779,7 @@ def store_dataframes_package(
name=piece_name,
resources=[],
)

for df, facet in zip(dataframes, facets):
resource_path = store_dataframe_resource(
df=df,
Expand All @@ -797,6 +803,8 @@ def store_dataframes_package(
if not frictionless:
return
package_descriptor["creator"] = DEFAULT_CREATOR_METADATA # custom metadata field
if custom_metadata is not None:
package_descriptor.update(custom_metadata)
package_descriptor_filepath = f"{piece_name}.datapackage.{descriptor_extension}"
package_descriptor_path = os.path.join(directory, package_descriptor_filepath)
store_as_json_or_yaml(
Expand Down
75 changes: 75 additions & 0 deletions src/ms3/utils/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1856,6 +1856,81 @@ def get_git_repo(
return repo


def get_git_revision(
repo: Optional[git.Repo] = None,
repo_path: Optional[str] = None,
) -> str:
if repo is None:
repo = get_git_repo(repo_path)
assert repo is not None, "No git repo to get the version from."
else:
assert repo_path is None, "Pass either repo or repo_path, not both."
return repo.head.commit.hexsha


@overload
def get_git_tag(
repo: Optional[git.Repo], repo_path: Optional[str], always: Literal[True]
) -> str:
...


@overload
def get_git_tag(
repo: Optional[git.Repo], repo_path: Optional[str], always: Literal[False]
) -> Optional[str]:
...


def get_git_tag(
repo: Optional[git.Repo] = None,
repo_path: Optional[str] = None,
always: bool = True,
) -> Optional[str]:
"""If always is set to True and no tags are found, the commit short hash is returned instead."""
if repo is None:
repo = get_git_repo(repo_path)
assert repo is not None, "No git repo to get the version from."
else:
assert repo_path is None, "Pass either repo or repo_path, not both."
if always:
return repo.git.describe(tags=True, always=always)
try:
return repo.git.describe(tags=True)
except Exception:
return


def get_git_version_info(
repo: Optional[git.Repo] = None,
repo_path: Optional[str] = None,
only_if_clean: bool = True,
):
if repo is None:
repo = get_git_repo(repo_path)
assert repo is not None, "No git repo to get the version from."
else:
assert repo_path is None, "Pass either repo or repo_path, not both."
if only_if_clean and repo.is_dirty():
return {}
return dict(
git_revision=get_git_revision(repo=repo),
git_tag=get_git_tag(repo=repo, always=True),
)


def git_repo_is_clean(
repo: Optional[git.Repo] = None,
repo_path: Optional[str] = None,
) -> bool:
if repo is None:
repo = get_git_repo(repo_path)
assert repo is not None, "No git repo to get the version from."
else:
assert repo_path is None, "Pass either repo or repo_path, not both."
return not repo.is_dirty()


def get_ms_version(mscx_file):
with open(mscx_file, encoding="utf-8") as file:
for i, l in enumerate(file):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_metarepo_files/debugging.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"""
import os.path

from ms3 import Parse, Score
from ms3 import Parse
from ms3.logger import get_logger
from ms3.operations import transform_to_resources

Expand Down

0 comments on commit 5b76a81

Please sign in to comment.