Skip to content

Commit

Permalink
Merge 0.4.3
Browse files Browse the repository at this point in the history
  • Loading branch information
johentsch authored Nov 23, 2020
2 parents 8ba6f0d + 2c70714 commit 64d64b0
Show file tree
Hide file tree
Showing 22 changed files with 13,291 additions and 1,088 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
Changelog
=========

Version 0.4.3
=============

* added 'ms3 check' command
* support of coloured labels
* write coloured labels to score comparing attached and detached labels to each other


Version 0.4.2
=============

Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ package_dir =
# DON'T CHANGE THE FOLLOWING LINE! IT WILL BE UPDATED BY PYSCAFFOLD!
setup_requires = pyscaffold>=3.2a0,<3.3a0
# Add here dependencies of your project (semicolon/line-separated), e.g.
install_requires = beautifulsoup4; lxml; pandas; pathos
install_requires = beautifulsoup4; lxml; pandas; pathos; webcolors
# The usage of test_requires is discouraged, see `Dependency Management` docs
# tests_require = pytest; pytest-cov
# Require a specific Python version, e.g. Python 2.7 or >= 3.4
Expand Down
3 changes: 3 additions & 0 deletions src/ms3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"""
# -*- coding: utf-8 -*-
from pkg_resources import get_distribution, DistributionNotFound
import os
os.environ["NUMEXPR_MAX_THREADS"] = "64"

try:
# Change here if project is renamed and does not equal the package name
Expand All @@ -18,3 +20,4 @@
from .score import Score
from .annotations import Annotations
from .parse import Parse
from .utils import COLORS, load_tsv
105 changes: 79 additions & 26 deletions src/ms3/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from .utils import decode_harmonies, is_any_row_equal, load_tsv, resolve_dir, update_cfg
from .utils import decode_harmonies, is_any_row_equal, html2format, load_tsv, map_dict, name2format, resolve_dir, rgb2format, update_cfg
from .logger import LoggedClass
from .expand_dcml import expand_labels

Expand Down Expand Up @@ -67,14 +67,20 @@ class Annotations(LoggedClass):
""",
re.VERBOSE)

main_cols = ['label', 'mc', 'mc_onset', 'staff', 'voice']
additional_cols = ['label_type', 'root', 'rootCase', 'base', 'leftParen', 'rightParen', 'offset_x', 'offset_y',
'nashville', 'decoded', 'color_name', 'color_html', 'color_r', 'color_g', 'color_b', 'color_a']

def __init__(self, tsv_path=None, df=None, cols={}, index_col=None, sep='\t', mscx_obj=None, infer_types={}, read_only=False, logger_cfg={}, **kwargs):
"""
Parameters
----------
tsv_path
df
cols
cols : :obj:`dict`, optional
If one or several column names differ, pass a {NAME -> ACTUAL_NAME} dictionary, where NAME can be
{'mc', 'mn', 'mc_onset', 'label', 'staff', 'voice', 'volta'}
index_col
sep
mscx_obj
Expand All @@ -93,27 +99,21 @@ def __init__(self, tsv_path=None, df=None, cols={}, index_col=None, sep='\t', ms
self.changed = False
self.read_only = read_only
self.mscx_obj = mscx_obj
self.cols = {
'mc': 'mc',
'mn': 'mn',
'mc_onset': 'mc_onset',
'label': 'label',
'staff': 'staff',
'voice': 'voice',
'volta': 'volta',
}
columns = self.main_cols + self.additional_cols
self.cols = {c: c for c in columns}
self.cols.update(update_cfg(cols, self.cols.keys(), logger=self.logger))

if df is not None:
self.df = df.copy()
else:
assert tsv_path is not None, "Name a TSV file to be loaded."
self.df = load_tsv(tsv_path, index_col=index_col, sep=sep, **kwargs)
for col in ['label', 'mc_onset']:
for col in ['label']:
assert self.cols[col] in self.df.columns, f"""The DataFrame has no column named '{self.cols[col]}'. Pass the column name as col={{'{col}'=col_name}}.
Present column names are:\n{self.df.columns.to_list()}."""
if 'offset' in self.df.columns:
self.df.drop(columns='offset', inplace=True)
# if 'offset' in self.df.columns:
# self.df.drop(columns='offset', inplace=True)
#self.cols = {k: v for k, v in self.cols.items() if k in self.main_cols + ['label_type'] or v in df.columns}
self.infer_types()


Expand Down Expand Up @@ -145,8 +145,10 @@ def prepare_for_attaching(self, staff=None, voice=None, check_for_clashes=True):
if voice is not None:
df[voice_col] = voice
if voice_col in cols and df[voice_col].isna().any():
self.logger.warning(f"The following labels don't have staff information: {df[df.voice.isna()]}")
self.logger.warning(f"The following labels don't ahve voice information: {df[df.voice.isna()]}")
error = True
if error:
return pd.DataFrame()

if self.cols['mc'] not in cols:
if self.cols['mn'] not in cols:
Expand All @@ -167,7 +169,7 @@ def prepare_for_attaching(self, staff=None, voice=None, check_for_clashes=True):

position_cols = ['mc', 'mc_onset', 'staff', 'voice']
new_pos_cols = [self.cols[c] for c in position_cols]
if all(c in df.columns for c in position_cols):
if all(c in df.columns for c in new_pos_cols):
if check_for_clashes and self.mscx_obj.has_annotations:
existing = self.mscx_obj.get_raw_labels()[position_cols]
to_be_attached = df[new_pos_cols]
Expand All @@ -184,7 +186,7 @@ def prepare_for_attaching(self, staff=None, voice=None, check_for_clashes=True):
return df


def n_labels(self):
def count(self):
return len(self.df)


Expand All @@ -200,14 +202,37 @@ def label_types(self):

@property
def annotation_layers(self):
layers = [col for col in ['staff', 'voice', 'label_type'] if col in self.df.columns]
return self.n_labels(), self.df.groupby(layers).size()
df = self.df.copy()
layers = ['staff', 'voice', 'label_type']
for c in layers:
if self.cols[c] not in df.columns:
df[c] = None
color_cols = ['color_name', 'color_html', 'color_r']
if any(True for c in color_cols if self.cols[c] in df):
color_name = self.cols['color_name']
if color_name in df.columns:
pass
elif self.cols['color_html'] in df.columns:
df[color_name] = html2format(df, 'name')
elif self.cols['color_r'] in df.columns:
df[color_name] = rgb2format(df, 'name')
df[color_name] = df[color_name].fillna('default')
layers += [color_name]
type2name = map_dict({
0: '0 (Plain Text)',
1: '1 (Nashville)',
2: '2 (Roman Numeral)',
3: '3 (Absolute Chord)',
'dcml': 'dcml',
})
df.label_type = df.label_type.map(type2name)
return self.count(), df.groupby(layers, dropna=False).size()

def __repr__(self):
n, layers = self.annotation_layers
return f"{n} labels:\n{layers.to_string()}"

def get_labels(self, staff=None, voice=None, label_type=None, positioning=True, decode=False, drop=False, warnings=True, column_name=None):
def get_labels(self, staff=None, voice=None, label_type=None, positioning=True, decode=False, drop=False, warnings=True, column_name=None, color_format='html'):
""" Returns a DataFrame of annotation labels.
Parameters
Expand All @@ -233,12 +258,15 @@ def get_labels(self, staff=None, voice=None, label_type=None, positioning=True,
Set to False to suppress warnings about non-existent label_types.
column_name : :obj:`str`, optional
Can be used to rename the columns holding the labels.
color_format : {'html', 'rgb', 'rgba', 'name', None}
If label colors are encoded, determine how they are displayed.
Returns
-------
"""
sel = pd.Series(True, index=self.df.index)

if staff is not None:
sel = sel & (self.df[self.cols['staff']] == staff)
if voice is not None:
Expand All @@ -250,7 +278,7 @@ def get_labels(self, staff=None, voice=None, label_type=None, positioning=True,
# (pd.to_numeric(self.df['label_type']).astype('Int64') == label_type).fillna(False)
res = self.df[sel].copy()
if not positioning:
pos_cols = [c for c in ['minDistance', 'offset', 'offset:x', 'offset:y'] if c in res.columns]
pos_cols = [c for c in ['minDistance', 'offset', 'offset_x', 'offset_y'] if c in res.columns]
res.drop(columns=pos_cols, inplace=True)
if drop:
self.df = self.df[~sel]
Expand All @@ -259,6 +287,28 @@ def get_labels(self, staff=None, voice=None, label_type=None, positioning=True,
res = decode_harmonies(res, label_col=label_col)
if column_name is not None and column_name != label_col:
res = res.rename(columns={label_col: column_name})
color_cols = ['color_html', 'color_r', 'color_g', 'color_b', 'color_a', 'color_name']
rgb_cols = ['color_r', 'color_g', 'color_b']
if color_format is not None and any(True for c in color_cols if c in res):
if color_format == 'html' and 'color_html' not in res.columns:
if 'color_name' in res.columns:
html = name2format(res, 'html')
elif 'color_r' in res.columns:
if any(True for c in rgb_cols if c not in res.columns):
logger.warning(f"The following columns are missing: {list(c for c in rgb_cols if c not in res.columns)}")
else:
html = rgb2format(res, 'html')
res['color_html'] = html
elif color_format == 'name' and 'color_name' not in res.columns:
if 'color_html' in res.columns:
name = html2format(res, 'name')
elif 'color_r' in res.columns:
if any(True for c in rgb_cols if c not in res.columns):
logger.warning(f"The following columns are required")
else:
name = rgb2format(res, 'name')
res['color_name'] = name

return res


Expand All @@ -284,7 +334,7 @@ def expand_dcml(self, drop_others=True, warn_about_others=True, **kwargs):
self.regex_dict = dict(dcml=self.dcml_double_re, **self.regex_dict)
self.infer_types()
df = self.get_labels(**kwargs)
sel = df.label_type == 'dcml'
sel = df.label_type.str.contains('dcml')
if not sel.any():
self.logger.info(f"Score does not contain any DCML harmonic annotations.")
return
Expand Down Expand Up @@ -335,10 +385,13 @@ def infer_types(self, regex_dict=None):
self.df.loc[self.df.nashville.notna(), 'label_type'] = 2
if 'root' in self.df.columns:
self.df.loc[self.df.root.notna(), 'label_type'] = 3
for name, regex in self.regex_dict.items():
sel = self.df.label_type == 0
mtch = self.df.loc[sel, self.cols['label']].str.match(regex)
self.df.loc[sel & mtch, 'label_type'] = name
if len(self.regex_dict) > 0:
decoded = decode_harmonies(self.df, label_col=self.cols['label'], return_series=True)
for name, regex in self.regex_dict.items():
sel = self.df.label_type.isin((0, 1, 2, 3))
#mtch = self.df.loc[sel, self.cols['label']].str.match(regex)
mtch = decoded[sel].str.match(regex)
self.df.loc[sel & mtch, 'label_type'] = self.df.loc[sel & mtch, 'label_type'].astype(str) + f" ({name})"


def store_tsv(self, tsv_path, staff=None, voice=None, label_type=None, positioning=True, decode=False, sep='\t', index=False, **kwargs):
Expand Down
Loading

0 comments on commit 64d64b0

Please sign in to comment.