From 240e360085bceb0cc039bcaa059ba2ffaf1e8609 Mon Sep 17 00:00:00 2001 From: mrzaizai2k Date: Wed, 13 Nov 2024 15:55:31 +0700 Subject: [PATCH] add category_matching_thresh config --- config/config.yaml | 1 + config/data1.json | 2 +- src/egw_export.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 4c4d9a1..a7f73f9 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -83,6 +83,7 @@ excel: egw: output_path: output + category_matching_thresh: 0.6 diff --git a/config/data1.json b/config/data1.json index 4d0cf9e..7c3af1e 100644 --- a/config/data1.json +++ b/config/data1.json @@ -42,7 +42,7 @@ "start_time": "06:00:00", "end_time": "16:00:00", "break_time": 0.5, - "description": "reisezeit ", + "description": "Reisezeit Auftrag", "has_customer_signature": false }, { diff --git a/src/egw_export.py b/src/egw_export.py index 55e8783..8983b19 100644 --- a/src/egw_export.py +++ b/src/egw_export.py @@ -86,7 +86,7 @@ def calculate_similarity(a: str, b: str) -> float: """ return SequenceMatcher(None, a, b).ratio() -def handle_kategorie(invoice_info: dict, line: dict, threshold: float = 0.7) -> str: +def handle_kategorie(invoice_info: dict, line: dict, threshold: float = 0.6) -> str: """ Determine 'Kategorie' based on 'is_without_measuring_technology' and 'description'. If 'description' closely matches "reisezeit auftrag" based on the similarity threshold, @@ -132,7 +132,8 @@ def export_egw_file(config: dict, invoice_lists: list) -> str: for line in invoice_info["lines"]: title = handle_title(invoice_info, line) dauer = handle_dauer(line) - kategorie = handle_kategorie(invoice_info, line) # Now passing `line` for description check + kategorie = handle_kategorie(invoice_info, line, + threshold=config['category_matching_thresh']) # Now passing `line` for description check row = { "Stundenzettel ID": None,