Skip to content

Commit

Permalink
fix: date fields are treated as strings (#44)
Browse files Browse the repository at this point in the history
many issues that became non obvious in config file parse issues and
matchers and correlation between matcher config and what format you
should be comparing against.

So now when extracting, use the `ExtractorBase.parse_date` as it will
emit a date string in the format acceptable to beancount.

From thereon, your matchers can expect to work with dates as a string in
that format as you see it in your journals.

Specifying a date format in your extractor classes or in your extractor
config is merely to there to convert the date string found in your
source files (pdf, csv etc).
  • Loading branch information
airtonix authored Sep 18, 2024
1 parent 4b397eb commit c814a21
Show file tree
Hide file tree
Showing 9 changed files with 167 additions and 16 deletions.
8 changes: 6 additions & 2 deletions beancount_importer_rules/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,13 @@ def process(self):
"""

name: str = "extractor"
"""The name of the extractor. Will end up being available to matchers as `extractor`"""
date_field: str = "Date"
date_format: str = "%Y-%m-%d"
datetime_format: str = "%Y-%m-%d %H:%M:%S"
"""The field in the CSV file that contains the date"""
date_format: str = "YYYY-MM-DD"
"""Arrow date format"""
datetime_format: str = "YYYY-MM-DD HH:MM:SS"
"""Arrow datetime format"""

def __init__(
self,
Expand Down
40 changes: 38 additions & 2 deletions beancount_importer_rules/includes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

import pathlib
import sys

import yaml
from pydantic import TypeAdapter
Expand All @@ -17,10 +18,45 @@
RuleListAdapter = TypeAdapter(list[ImportRule | IncludeRule])


class NoDatesSafeLoader(yaml.SafeLoader):
@classmethod
def remove_implicit_resolver(cls, tag_to_remove):
"""
Remove implicit resolvers for a particular tag
Takes care not to modify resolvers in super classes.
We want to load datetimes as strings, not dates, because we
go on to serialise as json which doesn't have the advanced types
of yaml, and leads to incompatibilities down the track.
"""
if "yaml_implicit_resolvers" not in cls.__dict__:
cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()

for first_letter, mappings in cls.yaml_implicit_resolvers.items():
cls.yaml_implicit_resolvers[first_letter] = [
(tag, regexp) for tag, regexp in mappings if tag != tag_to_remove
]


NoDatesSafeLoader.remove_implicit_resolver("tag:yaml.org,2002:timestamp")


def load_includes(workdir_path: pathlib.Path, include_path: pathlib.Path) -> ImportList:
with include_path.open("rt") as fo:
rules = yaml.safe_load(fo)
imported = RuleListAdapter.validate_python(rules)
rules = yaml.load(fo, Loader=NoDatesSafeLoader)
try:
imported = RuleListAdapter.validate_python(rules)
except Exception as e:
# pretty print the error
print(
f"Error loading include file: {include_path}.\n\n"
f"{e}\n\n"
f"Include file content:\n"
f"{yaml.dump(rules, indent=2)}"
)
sys.exit(1)

return resolve_includes(workdir_path=workdir_path, rules=imported)


Expand Down
20 changes: 10 additions & 10 deletions beancount_importer_rules/processor/matchers.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import datetime
import pathlib
import re

from beancount_importer_rules.data_types import (
SimpleTxnMatchRule,
StrExactMatch,
StrMatch,
StrRegexMatch,
Transaction,
TxnMatchVars,
stringify_value,
)


Expand All @@ -26,23 +27,22 @@ def match_file(pattern: StrMatch, filepath: pathlib.Path | pathlib.PurePath) ->
return pattern.test(str(filepath))


def match_str(pattern: StrMatch, value: str | None) -> bool:
def match_str(
pattern: StrMatch, value: str | datetime.date | datetime.datetime | None
) -> bool:
if value is None:
return False

if pattern is None:
return True

if pattern == value:
return True

# Most patterns that are just strings are valid regexes.
if isinstance(pattern, str) and is_valid_regex(pattern):
pattern = StrRegexMatch(regex=pattern)

# if the pattern turns out to not be a regex, we can just compare the strings.
if isinstance(pattern, str):
pattern = StrExactMatch(equals=pattern)
return stringify_value(value) == pattern

return pattern.test(value)
# otherwise we assume it's a complex matcher
return pattern.test(stringify_value(value))


def match_transaction(
Expand Down
16 changes: 16 additions & 0 deletions beancount_importer_rules/templates.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pathlib
from datetime import date, datetime

from jinja2.sandbox import SandboxedEnvironment

Expand All @@ -7,7 +8,22 @@ def as_posix_path(path: pathlib.Path) -> str:
return pathlib.Path(path).as_posix()


def as_datetime(value):
return datetime.strptime(value, "%Y-%m-%d")


def as_date(value) -> date:
return datetime.strptime(value, "%Y-%m-%d").date()


def datetime_format(value, format="%H:%M %d-%m-%y") -> str:
return datetime.strftime(value, format)


def make_environment():
env = SandboxedEnvironment()
env.filters["as_date"] = as_date
env.filters["as_datetime"] = as_datetime
env.filters["datetime_format"] = datetime_format
env.filters["as_posix_path"] = as_posix_path
return env
1 change: 0 additions & 1 deletion tests/fixtures/engine/imported/.gitignore

This file was deleted.

4 changes: 4 additions & 0 deletions tests/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ def test_engine_run():
config_path = FIXTURE_FOLDER / "engine" / "import.yaml"
beanfile_path = FIXTURE_FOLDER / "engine" / "books" / "main.bean"

# remove any existing output files
for f in (workdir / "books" / "imported").glob("*.bean"):
f.unlink

engine = ImportRuleEngine(
workdir=str(workdir),
config_path=str(config_path),
Expand Down
13 changes: 12 additions & 1 deletion tests/test_match_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@
now.format("YYYY-MM-DD"),
True,
),
(
now.format("YYYY-MM-DD"),
now.format("YYYY-MM-DD"),
True,
),
(
now.shift(days=1).format("YYYY-MM-DD"),
now.format("YYYY-MM-DD"),
False,
),
],
)
def test_match_dates(
Expand All @@ -119,4 +129,5 @@ def test_match_dates(
value: str | None,
expected: bool,
):
assert match_str(pattern, value) == expected
outcome = match_str(pattern, value) == expected
assert outcome
63 changes: 63 additions & 0 deletions tests/test_match_regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import arrow
import pytest

from beancount_importer_rules.data_types import StrRegexMatch
from beancount_importer_rules.processor.matchers import (
match_str,
)

now = arrow.utcnow()


@pytest.mark.parametrize(
"pattern, value, expected",
[
(
r"2021-01-01",
r"2021-01-01",
True,
),
(
r"2021-01-01",
r"2021-01-02",
False,
),
(
r"2021-01-01",
None,
False,
),
(
r"2021-01-01",
now.format("YYYY-MM-DD"),
False,
),
(
r"2021-01-01",
"2021-01-01",
True,
),
(
r"2021-01-01",
"2021-01-02",
False,
),
(
r"2021-01-.*",
"2021-01-02",
True,
),
(
"2021",
"2021-01-02",
True,
),
],
)
def test_match_regex(
pattern: str | StrRegexMatch,
value: str | None,
expected: bool,
):
outcome = match_str(pattern, value) == expected
assert outcome
18 changes: 18 additions & 0 deletions tests/test_templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from beancount_importer_rules.templates import make_environment


def test_make_environment():
env = make_environment()
assert env
assert env.filters["as_date"]
assert env.filters["as_datetime"]
assert env.filters["datetime_format"]
assert env.filters["as_posix_path"]


def test_format_datetime():
env = make_environment()
template = "{{ date | as_date | datetime_format('%Y') }}"
result = env.from_string(template).render({"date": "2022-01-01"})

assert result == "2022"

0 comments on commit c814a21

Please sign in to comment.