Skip to content

Commit

Permalink
add category_matching_thresh config
Browse files Browse the repository at this point in the history
  • Loading branch information
mrzaizai2k committed Nov 13, 2024
1 parent 99a485e commit 240e360
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
1 change: 1 addition & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ excel:

egw:
output_path: output
category_matching_thresh: 0.6



Expand Down
2 changes: 1 addition & 1 deletion config/data1.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"start_time": "06:00:00",
"end_time": "16:00:00",
"break_time": 0.5,
"description": "reisezeit ",
"description": "Reisezeit Auftrag",
"has_customer_signature": false
},
{
Expand Down
5 changes: 3 additions & 2 deletions src/egw_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def calculate_similarity(a: str, b: str) -> float:
"""
return SequenceMatcher(None, a, b).ratio()

def handle_kategorie(invoice_info: dict, line: dict, threshold: float = 0.7) -> str:
def handle_kategorie(invoice_info: dict, line: dict, threshold: float = 0.6) -> str:
"""
Determine 'Kategorie' based on 'is_without_measuring_technology' and 'description'.
If 'description' closely matches "reisezeit auftrag" based on the similarity threshold,
Expand Down Expand Up @@ -132,7 +132,8 @@ def export_egw_file(config: dict, invoice_lists: list) -> str:
for line in invoice_info["lines"]:
title = handle_title(invoice_info, line)
dauer = handle_dauer(line)
kategorie = handle_kategorie(invoice_info, line) # Now passing `line` for description check
kategorie = handle_kategorie(invoice_info, line,
threshold=config['category_matching_thresh']) # Now passing `line` for description check

row = {
"Stundenzettel ID": None,
Expand Down

0 comments on commit 240e360

Please sign in to comment.