diff --git a/beancount_importer_rules/extractor.py b/beancount_importer_rules/extractor.py index ba793dd..1a22d0b 100644 --- a/beancount_importer_rules/extractor.py +++ b/beancount_importer_rules/extractor.py @@ -129,9 +129,13 @@ def process(self): """ name: str = "extractor" + """The name of the extractor. Will end up being available to matchers as `extractor`""" date_field: str = "Date" - date_format: str = "%Y-%m-%d" - datetime_format: str = "%Y-%m-%d %H:%M:%S" + """The field in the CSV file that contains the date""" + date_format: str = "YYYY-MM-DD" + """Arrow date format""" + datetime_format: str = "YYYY-MM-DD HH:MM:SS" + """Arrow datetime format""" def __init__( self, diff --git a/beancount_importer_rules/includes.py b/beancount_importer_rules/includes.py index e95f9fe..614dc2a 100644 --- a/beancount_importer_rules/includes.py +++ b/beancount_importer_rules/includes.py @@ -4,6 +4,7 @@ """ import pathlib +import sys import yaml from pydantic import TypeAdapter @@ -17,10 +18,45 @@ RuleListAdapter = TypeAdapter(list[ImportRule | IncludeRule]) +class NoDatesSafeLoader(yaml.SafeLoader): + @classmethod + def remove_implicit_resolver(cls, tag_to_remove): + """ + Remove implicit resolvers for a particular tag + + Takes care not to modify resolvers in super classes. + + We want to load datetimes as strings, not dates, because we + go on to serialise as json which doesn't have the advanced types + of yaml, and leads to incompatibilities down the track. + """ + if "yaml_implicit_resolvers" not in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + + for first_letter, mappings in cls.yaml_implicit_resolvers.items(): + cls.yaml_implicit_resolvers[first_letter] = [ + (tag, regexp) for tag, regexp in mappings if tag != tag_to_remove + ] + + +NoDatesSafeLoader.remove_implicit_resolver("tag:yaml.org,2002:timestamp") + + def load_includes(workdir_path: pathlib.Path, include_path: pathlib.Path) -> ImportList: with include_path.open("rt") as fo: - rules = yaml.safe_load(fo) - imported = RuleListAdapter.validate_python(rules) + rules = yaml.load(fo, Loader=NoDatesSafeLoader) + try: + imported = RuleListAdapter.validate_python(rules) + except Exception as e: + # pretty print the error + print( + f"Error loading include file: {include_path}.\n\n" + f"{e}\n\n" + f"Include file content:\n" + f"{yaml.dump(rules, indent=2)}" + ) + sys.exit(1) + return resolve_includes(workdir_path=workdir_path, rules=imported) diff --git a/beancount_importer_rules/processor/matchers.py b/beancount_importer_rules/processor/matchers.py index 3435d4d..e51320a 100644 --- a/beancount_importer_rules/processor/matchers.py +++ b/beancount_importer_rules/processor/matchers.py @@ -1,13 +1,14 @@ +import datetime import pathlib import re from beancount_importer_rules.data_types import ( SimpleTxnMatchRule, - StrExactMatch, StrMatch, StrRegexMatch, Transaction, TxnMatchVars, + stringify_value, ) @@ -26,23 +27,22 @@ def match_file(pattern: StrMatch, filepath: pathlib.Path | pathlib.PurePath) -> return pattern.test(str(filepath)) -def match_str(pattern: StrMatch, value: str | None) -> bool: +def match_str( + pattern: StrMatch, value: str | datetime.date | datetime.datetime | None +) -> bool: if value is None: return False - if pattern is None: - return True - - if pattern == value: - return True - + # Most patterns that are just strings are valid regexes. if isinstance(pattern, str) and is_valid_regex(pattern): pattern = StrRegexMatch(regex=pattern) + # if the pattern turns out to not be a regex, we can just compare the strings. if isinstance(pattern, str): - pattern = StrExactMatch(equals=pattern) + return stringify_value(value) == pattern - return pattern.test(value) + # otherwise we assume it's a complex matcher + return pattern.test(stringify_value(value)) def match_transaction( diff --git a/beancount_importer_rules/templates.py b/beancount_importer_rules/templates.py index c4b7f8d..4258af3 100644 --- a/beancount_importer_rules/templates.py +++ b/beancount_importer_rules/templates.py @@ -1,4 +1,5 @@ import pathlib +from datetime import date, datetime from jinja2.sandbox import SandboxedEnvironment @@ -7,7 +8,22 @@ def as_posix_path(path: pathlib.Path) -> str: return pathlib.Path(path).as_posix() +def as_datetime(value): + return datetime.strptime(value, "%Y-%m-%d") + + +def as_date(value) -> date: + return datetime.strptime(value, "%Y-%m-%d").date() + + +def datetime_format(value, format="%H:%M %d-%m-%y") -> str: + return datetime.strftime(value, format) + + def make_environment(): env = SandboxedEnvironment() + env.filters["as_date"] = as_date + env.filters["as_datetime"] = as_datetime + env.filters["datetime_format"] = datetime_format env.filters["as_posix_path"] = as_posix_path return env diff --git a/tests/fixtures/engine/imported/.gitignore b/tests/fixtures/engine/imported/.gitignore deleted file mode 100644 index 1639587..0000000 --- a/tests/fixtures/engine/imported/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.bean diff --git a/tests/test_engine.py b/tests/test_engine.py index 84d3b4e..21e6e84 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -23,6 +23,10 @@ def test_engine_run(): config_path = FIXTURE_FOLDER / "engine" / "import.yaml" beanfile_path = FIXTURE_FOLDER / "engine" / "books" / "main.bean" + # remove any existing output files + for f in (workdir / "books" / "imported").glob("*.bean"): + f.unlink + engine = ImportRuleEngine( workdir=str(workdir), config_path=str(config_path), diff --git a/tests/test_match_dates.py b/tests/test_match_dates.py index f49c84f..93b283e 100644 --- a/tests/test_match_dates.py +++ b/tests/test_match_dates.py @@ -107,6 +107,16 @@ now.format("YYYY-MM-DD"), True, ), + ( + now.format("YYYY-MM-DD"), + now.format("YYYY-MM-DD"), + True, + ), + ( + now.shift(days=1).format("YYYY-MM-DD"), + now.format("YYYY-MM-DD"), + False, + ), ], ) def test_match_dates( @@ -119,4 +129,5 @@ def test_match_dates( value: str | None, expected: bool, ): - assert match_str(pattern, value) == expected + outcome = match_str(pattern, value) == expected + assert outcome diff --git a/tests/test_match_regex.py b/tests/test_match_regex.py new file mode 100644 index 0000000..1613b03 --- /dev/null +++ b/tests/test_match_regex.py @@ -0,0 +1,63 @@ +import arrow +import pytest + +from beancount_importer_rules.data_types import StrRegexMatch +from beancount_importer_rules.processor.matchers import ( + match_str, +) + +now = arrow.utcnow() + + +@pytest.mark.parametrize( + "pattern, value, expected", + [ + ( + r"2021-01-01", + r"2021-01-01", + True, + ), + ( + r"2021-01-01", + r"2021-01-02", + False, + ), + ( + r"2021-01-01", + None, + False, + ), + ( + r"2021-01-01", + now.format("YYYY-MM-DD"), + False, + ), + ( + r"2021-01-01", + "2021-01-01", + True, + ), + ( + r"2021-01-01", + "2021-01-02", + False, + ), + ( + r"2021-01-.*", + "2021-01-02", + True, + ), + ( + "2021", + "2021-01-02", + True, + ), + ], +) +def test_match_regex( + pattern: str | StrRegexMatch, + value: str | None, + expected: bool, +): + outcome = match_str(pattern, value) == expected + assert outcome diff --git a/tests/test_templates.py b/tests/test_templates.py new file mode 100644 index 0000000..823e003 --- /dev/null +++ b/tests/test_templates.py @@ -0,0 +1,18 @@ +from beancount_importer_rules.templates import make_environment + + +def test_make_environment(): + env = make_environment() + assert env + assert env.filters["as_date"] + assert env.filters["as_datetime"] + assert env.filters["datetime_format"] + assert env.filters["as_posix_path"] + + +def test_format_datetime(): + env = make_environment() + template = "{{ date | as_date | datetime_format('%Y') }}" + result = env.from_string(template).render({"date": "2022-01-01"}) + + assert result == "2022"