From 086d1bce61d63bc3f7a92a1664f94da8f53fa61b Mon Sep 17 00:00:00 2001 From: "denis.plotnikov" Date: Tue, 19 Mar 2024 08:57:02 +0300 Subject: [PATCH] dev3 initial commit --- .gitignore | 1 + package_info.json | 2 +- recon_lw/__init__.py | 1 - recon_lw/{ => core}/EventsSaver.py | 1 - recon_lw/{ => core}/SequenceCache.py | 4 +- recon_lw/{ => core}/StateStream.py | 22 +- {template => recon_lw/core}/__init__.py | 0 recon_lw/{ => core}/_types.py | 0 .../core/cache}/__init__.py | 0 .../core/cache/processor}/__init__.py | 0 recon_lw/core/cache/processor/base.py | 35 + recon_lw/core/cache/processor/chain.py | 12 + recon_lw/{ => core}/message_utils.py | 0 recon_lw/core/rule/__init__.py | 1 + recon_lw/core/rule/base.py | 55 ++ recon_lw/core/rule/one_many.py | 113 ++++ recon_lw/core/rule/pair_one.py | 54 ++ recon_lw/{ => core}/stream.py | 10 +- recon_lw/{ => core}/ts_converters.py | 2 +- recon_lw/core/type/__init__.py | 1 + recon_lw/core/type/types.py | 6 + recon_lw/core/utility/__init__.py | 3 + recon_lw/core/utility/counter.py | 8 + recon_lw/core/utility/event_sequence.py | 10 + recon_lw/core/utility/recon_utils.py | 258 ++++++++ .../interpretation/__init__.py | 0 recon_lw/interpretation/adapter/__init__.py | 4 + .../interpretation/adapter/adapter_context.py | 21 + recon_lw/interpretation/adapter/base.py | 102 +++ recon_lw/interpretation/adapter/compound.py | 107 +++ recon_lw/interpretation/adapter/simple.py | 87 +++ .../interpretation/check_rule/__init__.py | 2 + recon_lw/interpretation/check_rule/adapter.py | 14 + recon_lw/interpretation/check_rule/base.py | 44 ++ .../interpretation/check_rule/check_result.py | 25 + recon_lw/interpretation/check_rule/equal.py | 25 + recon_lw/interpretation/condition/__init__.py | 2 + recon_lw/interpretation/condition/base.py | 11 + recon_lw/interpretation/condition/function.py | 18 + recon_lw/interpretation/converter/__init__.py | 10 + recon_lw/interpretation/converter/base.py | 18 + recon_lw/interpretation/converter/boolean.py | 16 + recon_lw/interpretation/converter/chain.py | 30 + .../interpretation/converter/condition.py | 23 + recon_lw/interpretation/converter/constant.py | 9 + recon_lw/interpretation/converter/datetime.py | 20 + .../interpretation/converter/dictionary.py | 37 ++ recon_lw/interpretation/converter/dummy.py | 8 + .../interpretation/converter/empty_string.py | 13 + recon_lw/interpretation/converter/function.py | 12 + recon_lw/interpretation/converter/length.py | 10 + recon_lw/interpretation/converter/list.py | 75 +++ recon_lw/interpretation/converter/mapping.py | 14 + recon_lw/interpretation/converter/regex.py | 15 + recon_lw/interpretation/converter/type.py | 31 + .../interpretation/field_checker/__init__.py | 2 + recon_lw/interpretation/field_checker/base.py | 22 + .../interpretation/field_checker/simple.py | 17 + .../field_extractor/__init__.py | 7 + .../interpretation/field_extractor/any_val.py | 25 + .../interpretation/field_extractor/base.py | 23 + .../interpretation/field_extractor/cache.py | 124 ++++ .../interpretation/field_extractor/concat.py | 18 + .../field_extractor/condition.py | 51 ++ .../field_extractor/constant.py | 40 ++ .../field_extractor/converter.py | 82 +++ .../field_extractor/dictionary.py | 39 ++ .../interpretation/field_extractor/list.py | 62 ++ .../interpretation/field_extractor/one_of.py | 18 + .../interpretation/field_extractor/refdata.py | 69 ++ recon_lw/interpretation/filter/__init__.py | 6 + .../interpretation/filter/amend_reject.py | 47 ++ recon_lw/interpretation/filter/base.py | 18 + recon_lw/interpretation/filter/dummy.py | 8 + recon_lw/interpretation/filter/field.py | 22 + .../interpretation/filter/filter_chain.py | 24 + recon_lw/interpretation/filter/function.py | 11 + .../interpretation/filter/message_type.py | 12 + .../interpretation/filter/non_empty_field.py | 20 + .../interpretation/filter/session_alias.py | 13 + .../interpretation_functions/__init__.py | 4 + .../interpretation_functions/base.py | 9 + .../event_enhancement/__init__.py | 2 + .../event_enhancement/base.py | 18 + .../event_enhancement/enhancement_chain.py | 18 + .../event_handling_strategy/__init__.py | 0 .../event_handling_strategy/base.py | 182 +++++ .../event_name_provider/__init__.py | 2 + .../event_name_provider/base.py | 26 + .../event_name_provider/simple.py | 18 + .../interpretation_functions/event_type.py | 3 +- .../interpretation_functions/simple.py | 139 ++++ .../{ => interpretation}/recon_ob_stats.py | 16 +- recon_lw/{ => matching}/LastStateMatcher.py | 15 +- recon_lw/{ => matching}/LiveObjectsCache.py | 5 +- .../{ => matching}/StateSequenceGenerator.py | 8 +- recon_lw/{ => matching}/TimeCacheMatcher.py | 2 +- recon_lw/matching/__init__.py | 0 recon_lw/matching/collect_matcher/__init__.py | 2 + recon_lw/matching/collect_matcher/base.py | 27 + recon_lw/matching/collect_matcher/default.py | 28 + recon_lw/matching/flush_function/__init__.py | 2 + recon_lw/matching/flush_function/base.py | 26 + recon_lw/matching/flush_function/default.py | 79 +++ recon_lw/matching/init_function/__init__.py | 2 + recon_lw/matching/init_function/base.py | 9 + .../init_function/context/__init__.py | 0 .../matching/init_function/context/base.py | 6 + .../matching/init_function/context/simple.py | 17 + recon_lw/matching/init_function/default.py | 18 + recon_lw/matching/key_functions/__init__.py | 3 + recon_lw/matching/key_functions/base.py | 14 + recon_lw/matching/key_functions/default.py | 23 + .../matching/key_functions/simple_copy.py | 29 + .../matching/key_functions/simple_original.py | 22 + .../matching_key_extractor/__init__.py | 2 + .../matching/matching_key_extractor/base.py | 16 + .../matching_key_extractor/separator.py | 47 ++ recon_lw/matching/old/__init__.py | 0 recon_lw/matching/old/matching.py | 133 ++++ recon_lw/matching/old/utils.py | 49 ++ recon_lw/{ => matching}/recon_ob.py | 30 +- .../{ => matching}/recon_ob_cross_stream.py | 10 +- recon_lw/{ => matching}/recon_oe_ob.py | 44 +- recon_lw/matching/stream_matcher/__init__.py | 1 + recon_lw/matching/stream_matcher/base.py | 10 + recon_lw/matching/stream_matcher/one_many.py | 65 ++ recon_lw/matching/stream_matcher/pair_one.py | 47 ++ recon_lw/recon_lw.py | 621 ------------------ recon_lw/recon_lw_entrypoint.py | 163 +++++ recon_lw/reporting/__init__.py | 0 recon_lw/reporting/coverage/__init__.py | 0 .../reporting/coverage/viewer/__init__.py | 0 .../coverage/viewer/fields_viewer.py | 75 +++ recon_lw/reporting/known_issues/__init__.py | 3 + recon_lw/reporting/known_issues/exec_type.py | 10 + recon_lw/reporting/known_issues/issue.py | 61 ++ .../reporting/known_issues/issue_status.py | 7 + recon_lw/reporting/match_diff/__init__.py | 0 .../match_diff/categorizer/__init__.py | 1 + .../reporting/match_diff/categorizer/base.py | 30 + .../reporting/match_diff/categorizer/basic.py | 64 ++ .../categorizer/event_category/__init__.py | 2 + .../categorizer/event_category/base.py | 44 ++ .../categorizer/event_category/basic.py | 89 +++ .../match_diff/categorizer/types/__init__.py | 5 + .../match_diff/categorizer/types/context.py | 14 + .../types/error_categories_stats.py | 32 + .../categorizer/types/error_examples.py | 28 + .../categorizer/types/field_problems.py | 22 + .../categorizer/types/match_stats.py | 15 + .../reporting/match_diff/viewer/__init__.py | 0 .../match_diff/viewer/category_displayer.py | 202 ++++++ .../viewer/color_provider/__init__.py | 0 .../match_diff/viewer/color_provider/base.py | 15 + .../viewer/color_provider/default.py | 6 + .../viewer/content_provider/__init__.py | 0 .../viewer/content_provider/base.py | 10 + .../viewer/content_provider/default.py | 12 + .../viewer/style_provider/__init__.py | 0 .../match_diff/viewer/style_provider/base.py | 16 + .../viewer/style_provider/default.py | 77 +++ .../match_diff/viewer/types/__init__.py | 0 .../match_diff/viewer/types/types.py | 8 + recon_lw/reporting/match_diff/viewer/utils.py | 32 + .../reporting/missing_messages/__init__.py | 0 .../missing_messages/categorizer/__init__.py | 0 .../categorizer/categorizer_impl.py | 17 + .../categorizer/matcher_interface.py | 11 + .../categorizer/matchers_impl.py | 31 + .../missing_messages/categorizer/rule.py | 12 + .../missing_messages/categorizer/utils.py | 0 recon_lw/reporting/missing_messages/utils.py | 49 ++ .../missing_messages/viewer/__init__.py | 0 .../viewer/missing_message.py | 37 ++ recon_lw/reporting/recon_context/__init__.py | 0 recon_lw/reporting/recon_context/context.py | 39 ++ recon_lw/reporting/recon_metadata/__init__.py | 1 + recon_lw/reporting/recon_metadata/base.py | 12 + recon_lw/reporting/stats/__init__.py | 0 recon_lw/reporting/stats/stats.py | 29 + recon_lw/reporting/utils.py | 16 + requirements.txt | 10 - template/adapters/base_adapter.py | 202 ------ template/adapters/readme.md | 4 - template/adapters/stream1_adapter.py | 41 -- template/adapters/stream2_adapter.py | 40 -- template/download_data.py | 3 - template/fields_checker.py | 67 -- template/interpret_functions.py | 206 ------ template/matching_functions.py | 139 ---- template/recon.py | 52 -- template/rules/rule1.py | 104 --- template/utils.py | 113 ---- 194 files changed, 4612 insertions(+), 1697 deletions(-) rename recon_lw/{ => core}/EventsSaver.py (99%) rename recon_lw/{ => core}/SequenceCache.py (97%) rename recon_lw/{ => core}/StateStream.py (95%) rename {template => recon_lw/core}/__init__.py (100%) rename recon_lw/{ => core}/_types.py (100%) rename {template/adapters => recon_lw/core/cache}/__init__.py (100%) rename {template/rules => recon_lw/core/cache/processor}/__init__.py (100%) create mode 100644 recon_lw/core/cache/processor/base.py create mode 100644 recon_lw/core/cache/processor/chain.py rename recon_lw/{ => core}/message_utils.py (100%) create mode 100644 recon_lw/core/rule/__init__.py create mode 100644 recon_lw/core/rule/base.py create mode 100644 recon_lw/core/rule/one_many.py create mode 100644 recon_lw/core/rule/pair_one.py rename recon_lw/{ => core}/stream.py (93%) rename recon_lw/{ => core}/ts_converters.py (91%) create mode 100644 recon_lw/core/type/__init__.py create mode 100644 recon_lw/core/type/types.py create mode 100644 recon_lw/core/utility/__init__.py create mode 100644 recon_lw/core/utility/counter.py create mode 100644 recon_lw/core/utility/event_sequence.py create mode 100644 recon_lw/core/utility/recon_utils.py rename template/adapters/utils.py => recon_lw/interpretation/__init__.py (100%) create mode 100644 recon_lw/interpretation/adapter/__init__.py create mode 100644 recon_lw/interpretation/adapter/adapter_context.py create mode 100644 recon_lw/interpretation/adapter/base.py create mode 100644 recon_lw/interpretation/adapter/compound.py create mode 100644 recon_lw/interpretation/adapter/simple.py create mode 100644 recon_lw/interpretation/check_rule/__init__.py create mode 100644 recon_lw/interpretation/check_rule/adapter.py create mode 100644 recon_lw/interpretation/check_rule/base.py create mode 100644 recon_lw/interpretation/check_rule/check_result.py create mode 100644 recon_lw/interpretation/check_rule/equal.py create mode 100644 recon_lw/interpretation/condition/__init__.py create mode 100644 recon_lw/interpretation/condition/base.py create mode 100644 recon_lw/interpretation/condition/function.py create mode 100644 recon_lw/interpretation/converter/__init__.py create mode 100644 recon_lw/interpretation/converter/base.py create mode 100644 recon_lw/interpretation/converter/boolean.py create mode 100644 recon_lw/interpretation/converter/chain.py create mode 100644 recon_lw/interpretation/converter/condition.py create mode 100644 recon_lw/interpretation/converter/constant.py create mode 100644 recon_lw/interpretation/converter/datetime.py create mode 100644 recon_lw/interpretation/converter/dictionary.py create mode 100644 recon_lw/interpretation/converter/dummy.py create mode 100644 recon_lw/interpretation/converter/empty_string.py create mode 100644 recon_lw/interpretation/converter/function.py create mode 100644 recon_lw/interpretation/converter/length.py create mode 100644 recon_lw/interpretation/converter/list.py create mode 100644 recon_lw/interpretation/converter/mapping.py create mode 100644 recon_lw/interpretation/converter/regex.py create mode 100644 recon_lw/interpretation/converter/type.py create mode 100644 recon_lw/interpretation/field_checker/__init__.py create mode 100644 recon_lw/interpretation/field_checker/base.py create mode 100644 recon_lw/interpretation/field_checker/simple.py create mode 100644 recon_lw/interpretation/field_extractor/__init__.py create mode 100644 recon_lw/interpretation/field_extractor/any_val.py create mode 100644 recon_lw/interpretation/field_extractor/base.py create mode 100644 recon_lw/interpretation/field_extractor/cache.py create mode 100644 recon_lw/interpretation/field_extractor/concat.py create mode 100644 recon_lw/interpretation/field_extractor/condition.py create mode 100644 recon_lw/interpretation/field_extractor/constant.py create mode 100644 recon_lw/interpretation/field_extractor/converter.py create mode 100644 recon_lw/interpretation/field_extractor/dictionary.py create mode 100644 recon_lw/interpretation/field_extractor/list.py create mode 100644 recon_lw/interpretation/field_extractor/one_of.py create mode 100644 recon_lw/interpretation/field_extractor/refdata.py create mode 100644 recon_lw/interpretation/filter/__init__.py create mode 100644 recon_lw/interpretation/filter/amend_reject.py create mode 100644 recon_lw/interpretation/filter/base.py create mode 100644 recon_lw/interpretation/filter/dummy.py create mode 100644 recon_lw/interpretation/filter/field.py create mode 100644 recon_lw/interpretation/filter/filter_chain.py create mode 100644 recon_lw/interpretation/filter/function.py create mode 100644 recon_lw/interpretation/filter/message_type.py create mode 100644 recon_lw/interpretation/filter/non_empty_field.py create mode 100644 recon_lw/interpretation/filter/session_alias.py create mode 100644 recon_lw/interpretation/interpretation_functions/__init__.py create mode 100644 recon_lw/interpretation/interpretation_functions/base.py create mode 100644 recon_lw/interpretation/interpretation_functions/event_enhancement/__init__.py create mode 100644 recon_lw/interpretation/interpretation_functions/event_enhancement/base.py create mode 100644 recon_lw/interpretation/interpretation_functions/event_enhancement/enhancement_chain.py create mode 100644 recon_lw/interpretation/interpretation_functions/event_handling_strategy/__init__.py create mode 100644 recon_lw/interpretation/interpretation_functions/event_handling_strategy/base.py create mode 100644 recon_lw/interpretation/interpretation_functions/event_name_provider/__init__.py create mode 100644 recon_lw/interpretation/interpretation_functions/event_name_provider/base.py create mode 100644 recon_lw/interpretation/interpretation_functions/event_name_provider/simple.py rename template/recon_event_types.py => recon_lw/interpretation/interpretation_functions/event_type.py (73%) create mode 100644 recon_lw/interpretation/interpretation_functions/simple.py rename recon_lw/{ => interpretation}/recon_ob_stats.py (92%) rename recon_lw/{ => matching}/LastStateMatcher.py (97%) rename recon_lw/{ => matching}/LiveObjectsCache.py (94%) rename recon_lw/{ => matching}/StateSequenceGenerator.py (94%) rename recon_lw/{ => matching}/TimeCacheMatcher.py (98%) create mode 100644 recon_lw/matching/__init__.py create mode 100644 recon_lw/matching/collect_matcher/__init__.py create mode 100644 recon_lw/matching/collect_matcher/base.py create mode 100644 recon_lw/matching/collect_matcher/default.py create mode 100644 recon_lw/matching/flush_function/__init__.py create mode 100644 recon_lw/matching/flush_function/base.py create mode 100644 recon_lw/matching/flush_function/default.py create mode 100644 recon_lw/matching/init_function/__init__.py create mode 100644 recon_lw/matching/init_function/base.py create mode 100644 recon_lw/matching/init_function/context/__init__.py create mode 100644 recon_lw/matching/init_function/context/base.py create mode 100644 recon_lw/matching/init_function/context/simple.py create mode 100644 recon_lw/matching/init_function/default.py create mode 100644 recon_lw/matching/key_functions/__init__.py create mode 100644 recon_lw/matching/key_functions/base.py create mode 100644 recon_lw/matching/key_functions/default.py create mode 100644 recon_lw/matching/key_functions/simple_copy.py create mode 100644 recon_lw/matching/key_functions/simple_original.py create mode 100644 recon_lw/matching/matching_key_extractor/__init__.py create mode 100644 recon_lw/matching/matching_key_extractor/base.py create mode 100644 recon_lw/matching/matching_key_extractor/separator.py create mode 100644 recon_lw/matching/old/__init__.py create mode 100644 recon_lw/matching/old/matching.py create mode 100644 recon_lw/matching/old/utils.py rename recon_lw/{ => matching}/recon_ob.py (97%) rename recon_lw/{ => matching}/recon_ob_cross_stream.py (98%) rename recon_lw/{ => matching}/recon_oe_ob.py (94%) create mode 100644 recon_lw/matching/stream_matcher/__init__.py create mode 100644 recon_lw/matching/stream_matcher/base.py create mode 100644 recon_lw/matching/stream_matcher/one_many.py create mode 100644 recon_lw/matching/stream_matcher/pair_one.py delete mode 100644 recon_lw/recon_lw.py create mode 100644 recon_lw/recon_lw_entrypoint.py create mode 100644 recon_lw/reporting/__init__.py create mode 100644 recon_lw/reporting/coverage/__init__.py create mode 100644 recon_lw/reporting/coverage/viewer/__init__.py create mode 100644 recon_lw/reporting/coverage/viewer/fields_viewer.py create mode 100644 recon_lw/reporting/known_issues/__init__.py create mode 100644 recon_lw/reporting/known_issues/exec_type.py create mode 100644 recon_lw/reporting/known_issues/issue.py create mode 100644 recon_lw/reporting/known_issues/issue_status.py create mode 100644 recon_lw/reporting/match_diff/__init__.py create mode 100644 recon_lw/reporting/match_diff/categorizer/__init__.py create mode 100644 recon_lw/reporting/match_diff/categorizer/base.py create mode 100644 recon_lw/reporting/match_diff/categorizer/basic.py create mode 100644 recon_lw/reporting/match_diff/categorizer/event_category/__init__.py create mode 100644 recon_lw/reporting/match_diff/categorizer/event_category/base.py create mode 100644 recon_lw/reporting/match_diff/categorizer/event_category/basic.py create mode 100644 recon_lw/reporting/match_diff/categorizer/types/__init__.py create mode 100644 recon_lw/reporting/match_diff/categorizer/types/context.py create mode 100644 recon_lw/reporting/match_diff/categorizer/types/error_categories_stats.py create mode 100644 recon_lw/reporting/match_diff/categorizer/types/error_examples.py create mode 100644 recon_lw/reporting/match_diff/categorizer/types/field_problems.py create mode 100644 recon_lw/reporting/match_diff/categorizer/types/match_stats.py create mode 100644 recon_lw/reporting/match_diff/viewer/__init__.py create mode 100644 recon_lw/reporting/match_diff/viewer/category_displayer.py create mode 100644 recon_lw/reporting/match_diff/viewer/color_provider/__init__.py create mode 100644 recon_lw/reporting/match_diff/viewer/color_provider/base.py create mode 100644 recon_lw/reporting/match_diff/viewer/color_provider/default.py create mode 100644 recon_lw/reporting/match_diff/viewer/content_provider/__init__.py create mode 100644 recon_lw/reporting/match_diff/viewer/content_provider/base.py create mode 100644 recon_lw/reporting/match_diff/viewer/content_provider/default.py create mode 100644 recon_lw/reporting/match_diff/viewer/style_provider/__init__.py create mode 100644 recon_lw/reporting/match_diff/viewer/style_provider/base.py create mode 100644 recon_lw/reporting/match_diff/viewer/style_provider/default.py create mode 100644 recon_lw/reporting/match_diff/viewer/types/__init__.py create mode 100644 recon_lw/reporting/match_diff/viewer/types/types.py create mode 100644 recon_lw/reporting/match_diff/viewer/utils.py create mode 100644 recon_lw/reporting/missing_messages/__init__.py create mode 100644 recon_lw/reporting/missing_messages/categorizer/__init__.py create mode 100644 recon_lw/reporting/missing_messages/categorizer/categorizer_impl.py create mode 100644 recon_lw/reporting/missing_messages/categorizer/matcher_interface.py create mode 100644 recon_lw/reporting/missing_messages/categorizer/matchers_impl.py create mode 100644 recon_lw/reporting/missing_messages/categorizer/rule.py create mode 100644 recon_lw/reporting/missing_messages/categorizer/utils.py create mode 100644 recon_lw/reporting/missing_messages/utils.py create mode 100644 recon_lw/reporting/missing_messages/viewer/__init__.py create mode 100644 recon_lw/reporting/missing_messages/viewer/missing_message.py create mode 100644 recon_lw/reporting/recon_context/__init__.py create mode 100644 recon_lw/reporting/recon_context/context.py create mode 100644 recon_lw/reporting/recon_metadata/__init__.py create mode 100644 recon_lw/reporting/recon_metadata/base.py create mode 100644 recon_lw/reporting/stats/__init__.py create mode 100644 recon_lw/reporting/stats/stats.py create mode 100644 recon_lw/reporting/utils.py delete mode 100644 template/adapters/base_adapter.py delete mode 100644 template/adapters/readme.md delete mode 100644 template/adapters/stream1_adapter.py delete mode 100644 template/adapters/stream2_adapter.py delete mode 100644 template/download_data.py delete mode 100644 template/fields_checker.py delete mode 100644 template/interpret_functions.py delete mode 100644 template/matching_functions.py delete mode 100644 template/recon.py delete mode 100644 template/rules/rule1.py delete mode 100644 template/utils.py diff --git a/.gitignore b/.gitignore index 89b67e1..b3e450d 100644 --- a/.gitignore +++ b/.gitignore @@ -2502,3 +2502,4 @@ env/lib/python3.7/site-packages/zipp-3.15.0.dist-info/RECORD env/lib/python3.7/site-packages/zipp-3.15.0.dist-info/top_level.txt env/lib/python3.7/site-packages/zipp-3.15.0.dist-info/WHEEL .DS_Store +**/__pycache__/ \ No newline at end of file diff --git a/package_info.json b/package_info.json index b7afe44..7ee6dd0 100644 --- a/package_info.json +++ b/package_info.json @@ -1,5 +1,5 @@ { "package_name": "recon-lw", - "package_version": "2.0.0" + "package_version": "3.0.0" } diff --git a/recon_lw/__init__.py b/recon_lw/__init__.py index 3b5caef..e69de29 100644 --- a/recon_lw/__init__.py +++ b/recon_lw/__init__.py @@ -1 +0,0 @@ -#INIT \ No newline at end of file diff --git a/recon_lw/EventsSaver.py b/recon_lw/core/EventsSaver.py similarity index 99% rename from recon_lw/EventsSaver.py rename to recon_lw/core/EventsSaver.py index e6e9fbc..c38dd15 100644 --- a/recon_lw/EventsSaver.py +++ b/recon_lw/core/EventsSaver.py @@ -2,7 +2,6 @@ from abc import ABC, abstractmethod from typing import Any -from recon_lw import recon_lw from datetime import datetime, timedelta from th2_data_services.data import Data from pathlib import Path diff --git a/recon_lw/SequenceCache.py b/recon_lw/core/SequenceCache.py similarity index 97% rename from recon_lw/SequenceCache.py rename to recon_lw/core/SequenceCache.py index 1680f06..4b58d79 100644 --- a/recon_lw/SequenceCache.py +++ b/recon_lw/core/SequenceCache.py @@ -1,8 +1,6 @@ from sortedcontainers import SortedKeyList -from recon_lw.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str, time_stamp_key - -from recon_lw import recon_lw +from recon_lw.core.ts_converters import time_stamp_key class SequenceCache: diff --git a/recon_lw/StateStream.py b/recon_lw/core/StateStream.py similarity index 95% rename from recon_lw/StateStream.py rename to recon_lw/core/StateStream.py index 350ce35..e396750 100644 --- a/recon_lw/StateStream.py +++ b/recon_lw/core/StateStream.py @@ -1,17 +1,16 @@ from __future__ import annotations +from typing import Callable, Any, Tuple, Iterator, Iterable +from recon_lw.core.ts_converters import epoch_nano_str_to_ts, time_stamp_key +from recon_lw.core.EventsSaver import EventsSaver, IEventsSaver +from datetime import datetime + +from recon_lw.core.utility import open_streams +from recon_lw.matching.LastStateMatcher import LastStateMatcher +from recon_lw.core._types import Th2Timestamp from collections import defaultdict from typing import Callable, Any, Tuple, Iterator, Iterable, Dict -from recon_lw.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str, time_stamp_key -from recon_lw import recon_lw -from th2_data_services.utils import time as time_utils -from recon_lw.SequenceCache import SequenceCache -from recon_lw.EventsSaver import EventsSaver, IEventsSaver -from datetime import datetime -from recon_lw.LastStateMatcher import LastStateMatcher -from recon_lw._types import Th2Timestamp - class StateStream: def __init__(self, @@ -52,7 +51,6 @@ def state_updates(self, stream: Iterable, snapshots_collection): for key, ts, action, state in updates: if key is not None: yield (key, ts, action, state) - def snapshots(self, stream: Iterable) -> Iterator[dict[str, Any]]: """It is expected Sorted stream! @@ -285,7 +283,7 @@ def get_mnc_oe_state_ts(o): def create_oe_snapshots_streams(oe_streams, result_events_path, buffer_len=100): events_saver = EventsSaver(result_events_path) filtered_streams = [stream.filter(order_updates_filter) for stream in oe_streams] - strm_list = recon_lw.open_streams(None, None, False, filtered_streams) + strm_list = open_streams(None, None, False, filtered_streams) m_stream = strm_list.sync_streams(order_updates_ts) state_stream = StateStream(get_next_update_oe, get_snapshot_id_oe, @@ -303,7 +301,7 @@ def create_oe_snapshots_streams(oe_streams, result_events_path, buffer_len=100): stream1 = None # Please create your MNC stream stream2 = state_stream.snapshots(m_stream) - streams = recon_lw.open_streams(None, data_objects=[stream1, stream2]) + streams = open_streams(None, data_objects=[stream1, stream2]) message_buffer = [None] * buffer_len diff --git a/template/__init__.py b/recon_lw/core/__init__.py similarity index 100% rename from template/__init__.py rename to recon_lw/core/__init__.py diff --git a/recon_lw/_types.py b/recon_lw/core/_types.py similarity index 100% rename from recon_lw/_types.py rename to recon_lw/core/_types.py diff --git a/template/adapters/__init__.py b/recon_lw/core/cache/__init__.py similarity index 100% rename from template/adapters/__init__.py rename to recon_lw/core/cache/__init__.py diff --git a/template/rules/__init__.py b/recon_lw/core/cache/processor/__init__.py similarity index 100% rename from template/rules/__init__.py rename to recon_lw/core/cache/processor/__init__.py diff --git a/recon_lw/core/cache/processor/base.py b/recon_lw/core/cache/processor/base.py new file mode 100644 index 0000000..e778acb --- /dev/null +++ b/recon_lw/core/cache/processor/base.py @@ -0,0 +1,35 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Optional, Protocol + +from recon_lw.core.type.types import Message + + +@dataclass +class CacheStore: + cache: dict + +class ICacheProcessor(Protocol): + @abstractmethod + def __call__(self, msg: Message, cache: CacheStore): + pass + +class CacheManager: + def __init__(self, + unfiltered_message_process: Optional[ICacheProcessor]=None, + filtered_message_processor: Optional[ICacheProcessor]=None, + cache_store: Optional[CacheStore] = CacheStore({}) + ): + self.cache = cache_store + self.unfiltered_message_process= unfiltered_message_process + self.filtered_message_processor = filtered_message_processor + + def process_unfiltered_message(self, msg: Message): + if self.unfiltered_message_process: + self.unfiltered_message_process(msg, self.cache) + + def process_filtered_message(self, msg: Message): + if self.filtered_message_processor: + self.filtered_message_processor(msg, self.cache) + + diff --git a/recon_lw/core/cache/processor/chain.py b/recon_lw/core/cache/processor/chain.py new file mode 100644 index 0000000..53b1b98 --- /dev/null +++ b/recon_lw/core/cache/processor/chain.py @@ -0,0 +1,12 @@ +from typing import List + +from recon_lw.core.cache.processor.base import ICacheProcessor, CacheStore +from recon_lw.core.type.types import Message + + +class ChainCacheProcessor(ICacheProcessor): + def __init__(self, processors: List[ICacheProcessor]): + self.processors = processors + def __call__(self, msg: Message, cache: CacheStore): + for processor in self.processors: + processor(msg, cache) \ No newline at end of file diff --git a/recon_lw/message_utils.py b/recon_lw/core/message_utils.py similarity index 100% rename from recon_lw/message_utils.py rename to recon_lw/core/message_utils.py diff --git a/recon_lw/core/rule/__init__.py b/recon_lw/core/rule/__init__.py new file mode 100644 index 0000000..ac97200 --- /dev/null +++ b/recon_lw/core/rule/__init__.py @@ -0,0 +1 @@ +from recon_lw.core.rule.base import AbstractRule \ No newline at end of file diff --git a/recon_lw/core/rule/base.py b/recon_lw/core/rule/base.py new file mode 100644 index 0000000..e3f05ab --- /dev/null +++ b/recon_lw/core/rule/base.py @@ -0,0 +1,55 @@ + +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional + +from recon_lw.core.EventsSaver import EventsSaver +from recon_lw.matching.init_function import AbstractMatcherContext + + +class RuleContext: + def __init__(self, + rule_root_event: dict, + event_saver: EventsSaver, + event_sequence: Dict[str, Any] + ): + self.rule_root_event = rule_root_event + self.event_saver = event_saver + self.event_sequence = event_sequence + + @staticmethod + def from_dict(rule_context: dict): + return RuleContext( + rule_context['events_saver'], + rule_context['event_sequence'], + rule_context['event'] + ) + +class AbstractRule(ABC): + + def __init__(self): + self.horizon_delay = None + self.collect_func = None + self.flush_func = None + + self.rule_context: Optional[RuleContext] = None + self.matcher_context: Optional[AbstractMatcherContext] = None + + self.first_key_func = None + self.second_key_func = None + + def set_rule_context(self, context: RuleContext): + self.rule_context = context + + def get_root_event(self) -> Dict[str, Any]: + return self.rule_context.rule_root_event + + def get_event_saver(self) -> EventsSaver: + return self.rule_context.event_saver + + def get_event_sequence(self) -> Dict[str, Any]: + return self.rule_context.event_sequence + + @abstractmethod + def to_dict(self) -> Dict[str, Any]: + pass + diff --git a/recon_lw/core/rule/one_many.py b/recon_lw/core/rule/one_many.py new file mode 100644 index 0000000..78b39a0 --- /dev/null +++ b/recon_lw/core/rule/one_many.py @@ -0,0 +1,113 @@ +from typing import Optional, Callable, Dict, Any + +from recon_lw.core.cache.processor.base import CacheManager +from recon_lw.core.rule.base import AbstractRule +from recon_lw.matching.LastStateMatcher import LastStateMatcher +from recon_lw.matching.collect_matcher.base import CollectMatcher +from recon_lw.matching.flush_function import DefaultFlushFunction, FlushFunction +from recon_lw.matching.init_function import MatcherContextProvider, SimpleMatcherContext, DefaultMatcherContextProvider +from recon_lw.matching.key_functions import KeyFunction +from recon_lw.matching.stream_matcher import ReconMatcher + + +class OneManyRuleConfig(AbstractRule): + def __init__(self): + super().__init__() + self.first_key_func: Optional[KeyFunction] = None + self.second_key_func: Optional[KeyFunction] = None + self.cache_manager: Optional[CacheManager] = None + self._as_dict: dict = None + + def to_dict(self) -> Dict[str, Any]: + return self._as_dict + + @staticmethod + def from_dict(name: str, config: dict) -> 'OneManyRuleConfig': + + rule = OneManyRuleConfig() + rule._as_dict = config + + rule.name = name + rule.horizon_delay = config['horizon_delay'] + last_state_matcher = config.get('live_orders_cache') + + rule.collect_func = config.get('collect_func') + + if rule.collect_func is None: + from recon_lw.matching.collect_matcher import DefaultCollectMatcher + rule.collect_func = DefaultCollectMatcher(config['rule_match_func'], last_state_matcher) + + init_func = config.get('init_func', DefaultMatcherContextProvider()) + if isinstance(init_func, MatcherContextProvider): + rule.matcher_context = init_func.get_context() + + elif isinstance(init_func, Callable): + init_func(config) + rule.context = SimpleMatcherContext( + match_index=config['match_index'], + time_index=config['time_index'], + message_cache=config['message_cache'] + ) + + rule.flush_func = config.get('flush_func', + DefaultFlushFunction(config['interpret_func'], + last_state_matcher)) + + return rule + + @staticmethod + def from_params( + name: str, + horizon_delay: int, + context_provider: MatcherContextProvider=None, + collect_func: CollectMatcher=None, + flush_func: FlushFunction=None, + first_key_func: Callable = None, + second_key_func: Callable = None, + last_state_matcher: Optional[LastStateMatcher]=None, + cache_manager: CacheManager=None + ): + rule = OneManyRuleConfig() + rule.name = name + rule.horizon_delay = horizon_delay + rule.matcher_context = context_provider.get_context() + rule.last_state_matcher = last_state_matcher + rule.collect_func = collect_func + rule.first_key_func = first_key_func + rule.second_key_func = second_key_func + rule.flush_func = flush_func + rule.cache_manager = cache_manager + + return rule + + @staticmethod + def from_defaults( + name: str, + horizon_delay: int, + match_function: ReconMatcher, + intepretation_function: Callable, + first_key_func: Callable = None, + second_key_func: Callable = None, + ): + + from recon_lw.matching.init_function import DefaultMatcherContextProvider + context_provider = DefaultMatcherContextProvider() + + from recon_lw.matching.collect_matcher import DefaultCollectMatcher + collect_func = DefaultCollectMatcher(match_function) + + from recon_lw.matching.flush_function import DefaultFlushFunction + flush_func = DefaultFlushFunction(intepretation_function) + + rule = OneManyRuleConfig() + rule.name = name + rule.horizon_delay = horizon_delay + rule.matcher_context = context_provider.get_context() + rule.last_state_matcher = None + rule.collect_func = collect_func + rule.first_key_func = first_key_func + rule.second_key_func = second_key_func + rule.flush_func = flush_func + rule.cache_manager = None + + return rule \ No newline at end of file diff --git a/recon_lw/core/rule/pair_one.py b/recon_lw/core/rule/pair_one.py new file mode 100644 index 0000000..4fd41a8 --- /dev/null +++ b/recon_lw/core/rule/pair_one.py @@ -0,0 +1,54 @@ +from typing import Optional, Callable, Dict, Any + +from recon_lw.core.rule.base import AbstractRule +from recon_lw.matching.LastStateMatcher import LastStateMatcher +from recon_lw.matching.flush_function import DefaultFlushFunction +from recon_lw.matching.init_function import SimpleMatcherContext, MatcherContextProvider, DefaultMatcherContextProvider + + +class PairOneRule(AbstractRule): + def __init__(self): + super().__init__() + self.live_orders_cache: Optional[LastStateMatcher] = None + self.context: Optional[SimpleMatcherContext] = None + self.pair_key_func = None + self.one_key_func = None + self._dict_config = None + + def to_dict(self) -> Dict[str, Any]: + return self._dict_config + + @staticmethod + def from_dict(name: str, config: dict) -> 'PairOneRule': + rule = PairOneRule() + + rule.name = name + rule.horizon_delay = config['horizon_delay'] + last_state_matcher = config.get('live_orders_cache') + + rule.collect_func = config.get('collect_func') + + if rule.collect_func is None: + from recon_lw.matching.collect_matcher import DefaultCollectMatcher + rule.collect_func = DefaultCollectMatcher(config['rule_match_func'], last_state_matcher) + + init_func = config.get('init_func', DefaultMatcherContextProvider()) + if isinstance(init_func, MatcherContextProvider): + rule.context = init_func.get_context(rule) + + elif isinstance(init_func, Callable): + init_func(config) + rule.context = SimpleMatcherContext( + match_index=config['match_index'], + time_index=config['time_index'], + message_cache=config['message_cache'] + ) + + rule.flush_func = config.get('flush_func', + DefaultFlushFunction(rule.context, config['interpret_func'], last_state_matcher)) + rule._dict_config = config + + rule.pair_key_func = config.get('pair_key_func') + rule.one_key_func = config.get('one_key_func') + + return rule diff --git a/recon_lw/stream.py b/recon_lw/core/stream.py similarity index 93% rename from recon_lw/stream.py rename to recon_lw/core/stream.py index 8ac325a..e3c5a0a 100644 --- a/recon_lw/stream.py +++ b/recon_lw/core/stream.py @@ -1,10 +1,10 @@ -from typing import Tuple, Iterator, Optional, TypeVar, Dict, List, Callable, \ +from typing import Tuple, Iterator, Optional, List, Callable, \ Iterable, Any from sortedcontainers import SortedKeyList -from recon_lw._types import Th2Timestamp -from recon_lw.ts_converters import time_stamp_key +from recon_lw.core._types import Th2Timestamp +from recon_lw.core.ts_converters import time_stamp_key StreamsVal = Tuple[Th2Timestamp, Iterator, Optional[dict]] @@ -13,7 +13,6 @@ class Streams(SortedKeyList): """ Streams -- wrapper for SortedKeyList that provides type hints and methods to work with streams. - Note: Default sort function sorts by Seconds precision. @@ -44,10 +43,8 @@ def pop(self, index=-1) -> StreamsVal: def sync_streams(self, get_timestamp_func: Callable): """Yields synced by `get_timestamp_func` values from the streams. - Almost the same as `get_next_batch` but yields all values from all streams. `get_next_batch` will return only the messages in the list. - Args: get_timestamp_func: the function should take an element of any stream from streams inside this Streams object. @@ -92,7 +89,6 @@ def get_next_batch(self, o = next(iterator) self.add((get_timestamp_func(o), iterator, o)) except StopIteration as e: - # When iterator is empty. continue return batch_pos diff --git a/recon_lw/ts_converters.py b/recon_lw/core/ts_converters.py similarity index 91% rename from recon_lw/ts_converters.py rename to recon_lw/core/ts_converters.py index dbfb9bc..1aa1368 100644 --- a/recon_lw/ts_converters.py +++ b/recon_lw/core/ts_converters.py @@ -1,4 +1,4 @@ -from recon_lw._types import Th2Timestamp +from recon_lw.core._types import Th2Timestamp def epoch_nano_str_to_ts(s_nanos: str) -> Th2Timestamp: diff --git a/recon_lw/core/type/__init__.py b/recon_lw/core/type/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/recon_lw/core/type/__init__.py @@ -0,0 +1 @@ + diff --git a/recon_lw/core/type/types.py b/recon_lw/core/type/types.py new file mode 100644 index 0000000..be8647f --- /dev/null +++ b/recon_lw/core/type/types.py @@ -0,0 +1,6 @@ +from typing import Dict, Any, Callable, List, Union, Optional + +Message = Dict[str, Any] + +InterpretationFunctionType = Callable[[Message, Any, dict], List[dict]] +KeyFunctionType = Callable[[Message], Union[Optional[List[str]], Optional[str]]] \ No newline at end of file diff --git a/recon_lw/core/utility/__init__.py b/recon_lw/core/utility/__init__.py new file mode 100644 index 0000000..b53499e --- /dev/null +++ b/recon_lw/core/utility/__init__.py @@ -0,0 +1,3 @@ +from recon_lw.core.utility.counter import * +from recon_lw.core.utility.event_sequence import * +from recon_lw.core.utility.recon_utils import * \ No newline at end of file diff --git a/recon_lw/core/utility/counter.py b/recon_lw/core/utility/counter.py new file mode 100644 index 0000000..0f80740 --- /dev/null +++ b/recon_lw/core/utility/counter.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass + +@dataclass +class Counters: + match_ok: int = 0 + match_fail: int = 0 + no_right: int = 0 + no_left: int = 0 \ No newline at end of file diff --git a/recon_lw/core/utility/event_sequence.py b/recon_lw/core/utility/event_sequence.py new file mode 100644 index 0000000..54b9dd1 --- /dev/null +++ b/recon_lw/core/utility/event_sequence.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass + +@dataclass +class EventSequence: + name: str + timestamp: str + n: int + + def to_dict(self): + return {"name": self.name, "stamp": self.timestamp, "n": self.n} \ No newline at end of file diff --git a/recon_lw/core/utility/recon_utils.py b/recon_lw/core/utility/recon_utils.py new file mode 100644 index 0000000..33cf65c --- /dev/null +++ b/recon_lw/core/utility/recon_utils.py @@ -0,0 +1,258 @@ +from th2_data_services.config import options +from datetime import datetime +from recon_lw.core.message_utils import message_to_dict +from recon_lw.core.stream import Streams +from typing import Iterable, List, Optional, Dict +from recon_lw.core.ts_converters import time_stamp_key +from th2_data_services.data import Data +from os import listdir +from os import path + +def time_index_add(key, m, time_index): + time_index.add((options.mfr.get_timestamp(m), key)) + +def message_cache_add(m, message_cache): + message_cache[options.mfr.get_id(m)] = m + +def message_cache_pop(m_id, message_cache): + if m_id is None: + return None + return message_cache.pop(m_id) + +def create_event_id(event_sequence: dict): + event_sequence["n"] += 1 + return event_sequence["name"] + "_" + event_sequence["stamp"] + "-" + str(event_sequence["n"]) + + +def create_event( + name, + type, + event_sequence: dict, + ok=True, + body=None, + parentId=None, + recon_name='', +): + # TODO - description is required. + ts = datetime.now() + e = {"eventId": create_event_id(event_sequence), + "successful": ok, + "eventName": name, + "eventType": type, + "recon_name": recon_name, + "body": body, + "parentEventId": parentId, + "startTimestamp": {"epochSecond": int(ts.timestamp()), "nano": ts.microsecond * 1000}, + "attachedMessageIds": []} + return e + +def simplify_message(m): + """Returns a copy of m with changed fields: + + Added: + - simpleBody + - protocol + + Removed + - body + - bodyBase64 + + :param m: + :return: + """ + mm = m.copy() + if len(m["body"]) > 0: + mm["simpleBody"] = message_to_dict(m) + mm["protocol"] = protocol(m) + else: + mm["simpleBody"] = {} + + # TODO + # - it's better to get these names from DataSource message description. + # - it's possible that sometime the path of the body will be changed. + mm.pop("body") + mm.pop("bodyBase64") + return mm + +def load_to_list(messages: Iterable[dict], simplify: bool) -> List[dict]: + if simplify: + return list(map(simplify_message, messages)) + else: + return list(messages) + + +def split_messages_pickle_for_recons(message_pickle_path, output_path, sessions_list, + simplify=True): + """DEPRECATED FUNCTIONS SINCE WE HAVE DownloadCommand in LwDP data source. + + :param message_pickle_path: + :param output_path: + :param sessions_list: + :param simplify: + :return: + """ + messages = Data.from_cache_file(message_pickle_path) + for s in sessions_list: + messages_session_in = messages.filter( + lambda m: options.mfr.get_session_id(m) == s and options.mfr.get_direction(m) == "IN") + print("Sorting ", s, " IN ", datetime.now()) + arr = load_to_list(messages_session_in, simplify) + arr.sort(key=lambda m: time_stamp_key(m["timestamp"])) + messages_session_in_to_save = Data(arr) + file_name = output_path + "/" + s + "_IN.pickle" + print("Saving ", file_name, " ", datetime.now()) + messages_session_in_to_save.build_cache(file_name) + + messages_session_out = messages.filter( + lambda m: options.mfr.get_session_id(m) == s and options.mfr.get_direction(m) == "OUT") + print("Sorting ", s, " OUT ", datetime.now()) + arr = load_to_list(messages_session_out, simplify) + arr.sort(key=lambda m: time_stamp_key(m["timestamp"])) + messages_session_out_to_save = Data(arr) + + file_name = output_path + "/" + s + "_OUT.pickle" + print("Saving ", file_name, " ", datetime.now()) + messages_session_out_to_save.build_cache(file_name) + + +def protocol(m): + """ + + Expects the message after expand_message function. + + :param m: + :return: + """ + # Simplified message + if "body" not in m: + return m["protocol"] + + if len(m["body"]) == 0: + return "error" + + pr = options.smsr.get_protocol(options.mfr.get_body()) + return "not_defined" if pr is None else pr + + +def open_scoped_events_streams( + streams_path, + name_filter=None, + data_filter=None +) -> Streams: + """ + Get Streams object for Th2 events. + + Args: + streams_path: + name_filter: + data_filter: + + Returns: + Streams: [(Th2ProtobufTimestamp, + iterator for Data object, + First object from Data object or None), ...] + """ + streams = Streams() + files = listdir(streams_path) + files.sort() + # This part to replace Data+Data to Data([Data,Data]) + scopes_streams_temp: Dict[str, list] = {} + for f in files: + if ".pickle" not in f: + continue + if name_filter is not None and not name_filter(f): + continue + scope = f[:f.index("_scope_")] + if scope not in scopes_streams_temp: + scopes_streams_temp[scope] = [Data.from_cache_file(path.join(streams_path, f))] + else: + scopes_streams_temp[scope].append(Data.from_cache_file(path.join(streams_path, f))) + + scopes_streams: Dict[str, Data] = {scope: Data(scopes_streams_temp[scope]) + for scope in scopes_streams_temp} + for strm in scopes_streams.values(): + if data_filter: + strm = strm.filter(data_filter) + streams.add_stream(strm) + return streams + + +def open_streams( + streams_path: Optional[str], + name_filter=None, + expanded_messages: bool = False, + data_objects: List[Data] = None +) -> Streams: + """ + Get Streams object for Th2 messages. + + Args: + streams_path: + name_filter: + expanded_messages: + data_objects: + + Returns: + Streams: [(Th2ProtobufTimestamp, + iterator for Data object, + First object from Data object or None), ...] + """ + streams = Streams() + + if data_objects: + for do in data_objects: + ts0 = {"epochSecond": 0, "nano": 0} + if expanded_messages: + stream = (mm for m in do for mm in options.mfr.expand_message(m)) + else: + stream = do + streams.add((ts0, iter(stream), None)) + else: + files = listdir(streams_path) + for f in files: + if ".pickle" not in f: + continue + if name_filter is not None and not name_filter(f): + continue + data_object = Data.from_cache_file(path.join(streams_path, f)) + if expanded_messages: + stream = (mm for m in data_object for mm in + options.MESSAGE_FIELDS_RESOLVER.expand_message(m)) + else: + stream = Data.from_cache_file(path.join(streams_path, f)) + streams.add_stream(stream) + + return streams + + +def get_next_batch(streams: Streams, + batch: List[Optional[dict]], + batch_len, + get_timestamp_func) -> int: + """ + + Args: + streams: [(Th2ProtobufTimestamp, + iterator for Data object, + First object from Data object or None), ...] + batch: + b_len: + get_timestamp_func: + + Returns: + + """ + # DEPRECATED. + return streams.get_next_batch( + batch=batch, + batch_len=batch_len, + get_timestamp_func=get_timestamp_func + ) + + +def sync_stream(streams: Streams, + get_timestamp_func): + # DEPRECATED. + # Use streams.sync_streams instead. + yield from streams.sync_streams(get_timestamp_func) + diff --git a/template/adapters/utils.py b/recon_lw/interpretation/__init__.py similarity index 100% rename from template/adapters/utils.py rename to recon_lw/interpretation/__init__.py diff --git a/recon_lw/interpretation/adapter/__init__.py b/recon_lw/interpretation/adapter/__init__.py new file mode 100644 index 0000000..11d2c42 --- /dev/null +++ b/recon_lw/interpretation/adapter/__init__.py @@ -0,0 +1,4 @@ +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.interpretation.adapter.compound import CompoundAdapter, CompoundAdapterBuilder +from recon_lw.interpretation.adapter.simple import SimpleAdapter, SimpleAdapterBuilder +from recon_lw.interpretation.adapter.adapter_context import AdapterContext diff --git a/recon_lw/interpretation/adapter/adapter_context.py b/recon_lw/interpretation/adapter/adapter_context.py new file mode 100644 index 0000000..6718072 --- /dev/null +++ b/recon_lw/interpretation/adapter/adapter_context.py @@ -0,0 +1,21 @@ +from typing import Any, Dict, Optional + +from recon_lw.core.cache.processor.base import CacheStore + +class AdapterContext: + + """ + A class representing an adapter context. This can be used to store something to cache / access something from cache + + Attributes: + _cache (dict): A dictionary to store cache data. + """ + + def __init__(self, cache_store: Optional[CacheStore]=None): + if cache_store is None: + self._cache_store = CacheStore({}) + else: + self._cache_store = cache_store + + def get_cache(self) -> Dict[str, Any]: + return self._cache_store.cache diff --git a/recon_lw/interpretation/adapter/base.py b/recon_lw/interpretation/adapter/base.py new file mode 100644 index 0000000..ce1a0e2 --- /dev/null +++ b/recon_lw/interpretation/adapter/base.py @@ -0,0 +1,102 @@ +from abc import ABC, abstractmethod +from typing import Dict +from recon_lw.interpretation.adapter.adapter_context import AdapterContext +from typing import List, Any, Optional, Set + + +class Adapter(ABC): + """ + Abstract base class representing an adapter. + + This class defines the interface for adapter implementations. Adapters are + used to transform messages from one format to another. + It is required to make it possible to compare two streams in different formats. + + Attributes: + covered_fields (set): A set containing the fields covered by the adapter. + body_field (list of str): The path to the body field in the message. + metadata_path (list of str): The path to the metadata field in the message. + + Methods: + __init__: Constructor method for the Adapter class. + get_fields_group: Abstract method to get fields group from a message. + on_message: Abstract method to handle incoming messages. + on_message_exit: Abstract method to handle exiting messages. + get: Abstract method to get a field from a message. + get_root_message_field: Abstract method to get the root message field. + get_metadata_field: Abstract method to get a metadata field from a message. + get_fields_coverage: Method to get the coverage of fields. + get_body: Method to get the body of a message. + get_metadata: Method to get the metadata of a message. + get_direct_body: Method to get a direct body field from a message. + get_message_type: Method to get the message type from metadata. + set: Method to set a field in a message. + """ + + covered_fields = set() + body_field = ["body", "fields"] + metadata_path = ["body", "metadata"] + + def __init__( + self, + body_path: Optional[List[str]] = None, + metadata_path: Optional[List['str']] = None, + extractors_mapping: Optional[Dict[str, Any]]=None + ): + self.adapter_context = AdapterContext() + self.mapping = extractors_mapping if extractors_mapping else {} + if body_path is not None: + self.body_field = body_path + + if metadata_path is not None: + self.metadata_path = metadata_path + + @abstractmethod + def get_fields_group(self, message, group_name) -> Dict[str, Any]: + pass + + @abstractmethod + def on_message(self, m): + pass + + @abstractmethod + def on_message_exit(self, m): + pass + + @abstractmethod + def get(self, message, field, strict=False) -> Any: + pass + + @abstractmethod + def get_root_message_field(self, message, parameter_name, strict=False) -> Any: + pass + + @abstractmethod + def get_metadata_field(self, message, field_name, strict=False) -> Any: + pass + + def get_fields_coverage(self) -> Set[str]: + pass + + def get_body(self, m) -> Dict[str, Any]: + for key in self.body_field: + m = m[key] + if isinstance(m, List): + m = m[0] + return m + + def get_metadata(self, m) -> Dict[str, Any]: + for key in self.metadata_path: + m = m[key] + if isinstance(m, List): + m = m[0] + return m + + def get_direct_body(self, message, field) -> Dict[str, Any]: + return self.get_body(message).get(field) + + def get_message_type(self, message) -> str: + return self.get_metadata(message)['messageType'] + + def set(self, message, field, val): + message[field] = val diff --git a/recon_lw/interpretation/adapter/compound.py b/recon_lw/interpretation/adapter/compound.py new file mode 100644 index 0000000..051293c --- /dev/null +++ b/recon_lw/interpretation/adapter/compound.py @@ -0,0 +1,107 @@ +from recon_lw.interpretation.condition.base import Condition +from recon_lw.interpretation.adapter.base import Adapter +from typing import List, Optional, Tuple, Dict + +from recon_lw.interpretation.field_extractor import Extractor + + +class CompoundAdapter(Adapter): + """ + A compound adapter composed of multiple adapters with associated conditions. + + This adapter selects the appropriate adapter based on conditions and delegates + message handling to the selected adapter. + + Attributes: + adapters (List[Tuple[Condition, Adapter]]): A list of tuples containing + conditions and associated adapters. + body_path (Optional[List[str]]): The path to the body field in the message. + mapping_path (Optional[List[str]]): The path to the mapping field in the message. + mapping (Optional[Dict[str, Extractor]]): A mapping of field names to extractors. + """ + + def __init__(self, + adapters: List[Tuple[Condition, Adapter]], + body_path: Optional[List[str]] = None, + mapping_path: Optional[List[str]] = None, + mapping: Optional[Dict[str, Extractor]]=None + ): + super().__init__(body_path, mapping_path, mapping) + self.adapters = adapters + + def get_adapter(self, message): + for condition, adapter in self.adapters: + if condition(message[self.body_field], adapter): + return adapter + + raise SystemError(f"No adapter for {message[self.body_field]}") + + def get(self, message, field, strict=False): + handler = self.get_adapter(message) + + return handler.get(message, field, strict) + + def on_message(self, m): + handler = self.get_adapter(m) + return handler.on_message(m) + + def on_message_exit(self, m): + handler = self.get_adapter(m) + return handler.on_message_exit(m) + + def get_fields_group(self, m, group_name): + handler = self.get_adapter(m) + return handler.get_fields_group(m, group_name) + + +class CompoundAdapterBuilder: + """ + A builder class for constructing instances of the CompoundAdapter class. + + Attributes: + _conditions_and_adapters (List[Tuple[Condition, Adapter]]): A list of tuples + containing conditions and associated adapters. + _mapping (Dict[str, Extractor]): A mapping of field names to extractors. + _body_path (Optional[List[str]]): The path to the body field in the message. + _metadata_path (Optional[List[str]]): The path to the metadata field in the message. + + Methods: + __init__: Constructor method for the CompoundAdapterBuilder class. + with_mapping: Method to set the mapping for the CompoundAdapter. + with_body_path: Method to set the body path for the CompoundAdapter. + with_metadata_path: Method to set the metadata path for the CompoundAdapter. + add_adapter: Method to add an adapter with its associated condition. + build: Method to build and return the constructed CompoundAdapter instance. + """ + + def __init__(self): + super().__init__() + self._conditions_and_adapters: List[Tuple[Condition, Adapter]] = [] + self._mapping: Dict[str, Extractor] = {} + self._body_path = None + self._metadata_path = None + + def with_mapping(self, mapping: Dict[str, Extractor]) -> 'CompoundAdapterBuilder': + self._mapping = mapping + return self + + def with_body_path(self, body_path: List[str]) -> 'CompoundAdapterBuilder': + self._body_path = body_path + return self + + def with_metadata_path(self, metadata_path: List[str]) -> 'CompoundAdapterBuilder': + self._metadata_path = metadata_path + return self + + def add_adapter(self, condition: Condition, adapter: Adapter) -> 'CompoundAdapterBuilder': + self._conditions_and_adapters.append((condition, adapter)) + return self + + def build(self) -> CompoundAdapter: + return CompoundAdapter( + self._conditions_and_adapters, + self._body_path, + self._metadata_path, + self._mapping + ) + diff --git a/recon_lw/interpretation/adapter/simple.py b/recon_lw/interpretation/adapter/simple.py new file mode 100644 index 0000000..6d72f8f --- /dev/null +++ b/recon_lw/interpretation/adapter/simple.py @@ -0,0 +1,87 @@ +from typing import List, Optional, Dict, Union + +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.interpretation.field_extractor.base import Extractor, ExtractorProtocol + + +class SimpleAdapter(Adapter): + def __init__(self, + body_path: Optional[List[str]] = None, + metadata_path: Optional[List[str]] = None, + mapping: Optional[Dict[str, Union[Extractor, ExtractorProtocol, str]]] = None + ): + super().__init__(body_path, metadata_path, mapping) + + def get(self, message, field, strict=False): + extractor = self.mapping[field] + if isinstance(extractor, Extractor): + val = extractor(self.get_body(message), self) + elif isinstance(extractor, str): + val = self.get_body(message).get(extractor, Extractor.NOT_EXTRACTED) + else: + val = extractor(message) + if strict and val == Extractor.NOT_EXTRACTED: + raise KeyError(field) + + if val != Extractor.NOT_EXTRACTED: + val = str(val) + + return val + + def get_root_message_field(self, message, parameter_name, strict=False): + extractor = self.mapping[parameter_name] + val = extractor(message, self) + if strict and val == Extractor.NOT_EXTRACTED: + raise KeyError(parameter_name) + + if val != Extractor.NOT_EXTRACTED: + val = str(val) + + return val + + def get_metadata_field(self, message, field_name, strict=False): + extractor = self.mapping[field_name] + val = extractor(message['metadata'], self) + if strict and val == Extractor.NOT_EXTRACTED: + raise KeyError(field_name) + + if val != Extractor.NOT_EXTRACTED: + val = str(val) + + return val + + def on_message(self, m): + pass + + def on_message_exit(self, m): + pass + + def get_fields_group(self, m, group_name): + pass + + +class SimpleAdapterBuilder: + def __init__(self): + super().__init__() + self.metadata_path: Optional[List[str]] = None + self.body_path: Optional[List[str]] = None + self.mapping: Optional[Dict[str, Extractor]] = None + + def with_body_path(self, body_path: List[str]) -> 'SimpleAdapterBuilder': + self.body_path = body_path + return self + + def with_metadata_path(self, metadata_path: List[str]) -> 'SimpleAdapterBuilder': + self.metadata_path = metadata_path + return self + + def with_mapping(self, mapping: Dict[str, Extractor]) -> 'SimpleAdapterBuilder': + self.mapping = mapping + return self + + def build(self) -> SimpleAdapter: + return SimpleAdapter( + self.body_path, + self.metadata_path, + self.mapping + ) diff --git a/recon_lw/interpretation/check_rule/__init__.py b/recon_lw/interpretation/check_rule/__init__.py new file mode 100644 index 0000000..de24fe4 --- /dev/null +++ b/recon_lw/interpretation/check_rule/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.interpretation.check_rule.base import IFieldCheckRule, FieldToCheck +from recon_lw.interpretation.check_rule.equal import EqualFieldCheckRule, FieldCheckResult diff --git a/recon_lw/interpretation/check_rule/adapter.py b/recon_lw/interpretation/check_rule/adapter.py new file mode 100644 index 0000000..3e58558 --- /dev/null +++ b/recon_lw/interpretation/check_rule/adapter.py @@ -0,0 +1,14 @@ +from recon_lw.interpretation.check_rule.base import IFieldCheckRule +from abc import ABC +from recon_lw.interpretation.adapter.base import Adapter + +class IAdapterFieldCheckRule(IFieldCheckRule, ABC): + def __init__(self, stream1_adapter: Adapter, + stream2_adapter: Adapter): + self.stream1_adapter = stream1_adapter + self.stream2_adapter = stream2_adapter + + def get_field_values(self, field, msg1, msg2): + v1 = self.stream1_adapter.get(msg1, field) + v2 = self.stream2_adapter.get(msg2, field) + return v1, v2 \ No newline at end of file diff --git a/recon_lw/interpretation/check_rule/base.py b/recon_lw/interpretation/check_rule/base.py new file mode 100644 index 0000000..7ee0830 --- /dev/null +++ b/recon_lw/interpretation/check_rule/base.py @@ -0,0 +1,44 @@ +from abc import ABC, abstractmethod +from typing import Protocol + +from recon_lw.interpretation.check_rule.check_result import FieldCheckResult +from dataclasses import dataclass + + +class IFieldCheckRule(ABC): + """ + Abstract base class representing a field check rule. + + This class defines the interface for field check rules, which are used to + compare fields between two messages. + """ + + def __call__(self, field, msg1, msg2): + return self.handler(field, msg1, msg2) + + @abstractmethod + def handler(self, field, msg1, msg2) -> FieldCheckResult: + pass + +class IFieldCheckRuleProtocol(Protocol): + def __call__(self, field, msg1, msg2): + pass + + +@dataclass +class FieldToCheck: + """ + Data class representing a field to check. + + This class holds information about a field that needs to be checked between + two messages. + + Attributes: + field (str): The name of the field to check. + field_checker (IFieldCheckRule): The field check rule associated with the field. + field_description (str): A description of the field (optional). + + """ + field: str + field_checker: IFieldCheckRule + field_description: str = '' \ No newline at end of file diff --git a/recon_lw/interpretation/check_rule/check_result.py b/recon_lw/interpretation/check_rule/check_result.py new file mode 100644 index 0000000..19d053b --- /dev/null +++ b/recon_lw/interpretation/check_rule/check_result.py @@ -0,0 +1,25 @@ +from dataclasses import dataclass +from typing import Any, Optional + + +@dataclass +class FieldCheckResult: + """ + Data class representing the result of a field check. + + This class holds information about the comparison result of a field between + two messages. + + Attributes: + field (str): The name of the field that was checked. + left_val (Any): The value of the field in the left message. + right_val (Any): The value of the field in the right message. + result (Any): The result of the field check. + check_comment (Optional[str]): An optional comment about the check result. + + """ + field: str + left_val: Any + right_val: Any + result: Any + check_comment: Optional[str] = None diff --git a/recon_lw/interpretation/check_rule/equal.py b/recon_lw/interpretation/check_rule/equal.py new file mode 100644 index 0000000..ec99ef1 --- /dev/null +++ b/recon_lw/interpretation/check_rule/equal.py @@ -0,0 +1,25 @@ +from recon_lw.interpretation.check_rule.adapter import IAdapterFieldCheckRule +from recon_lw.interpretation.check_rule.check_result import FieldCheckResult + + +class EqualFieldCheckRule(IAdapterFieldCheckRule): + """ + A field check rule that checks if two fields are equal. + + This rule compares the values of a field between two messages and returns + whether they are equal. + + Methods: + handler: Method to implement the field check logic. + """ + + def handler(self, field, msg1, msg2) -> FieldCheckResult: + v1, v2 = self.get_field_values(field, msg1, msg2) + + return FieldCheckResult( + field=field, + left_val=v1, + right_val=v2, + result=v1 == v2, + check_comment='Equal comparison' + ) diff --git a/recon_lw/interpretation/condition/__init__.py b/recon_lw/interpretation/condition/__init__.py new file mode 100644 index 0000000..3f1d090 --- /dev/null +++ b/recon_lw/interpretation/condition/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.interpretation.condition.base import Condition +from recon_lw.interpretation.condition.function import FunctionCondition \ No newline at end of file diff --git a/recon_lw/interpretation/condition/base.py b/recon_lw/interpretation/condition/base.py new file mode 100644 index 0000000..0643e82 --- /dev/null +++ b/recon_lw/interpretation/condition/base.py @@ -0,0 +1,11 @@ +from typing import Protocol + +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import Message +from abc import ABC, abstractmethod + + +class Condition(Protocol): + + def __call__(self, message: Message, adapter: Adapter) -> bool: + return True diff --git a/recon_lw/interpretation/condition/function.py b/recon_lw/interpretation/condition/function.py new file mode 100644 index 0000000..b1040d7 --- /dev/null +++ b/recon_lw/interpretation/condition/function.py @@ -0,0 +1,18 @@ +from recon_lw.interpretation.condition.base import Condition +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import Message +from typing import Callable + + +class FunctionCondition(Condition): + + def __init__(self, function: Callable[[Message, Adapter], bool]): + self.function = function + self.cache = None + + def __call__(self, message: Message, adapter: Adapter) -> bool: + if self.cache: + return self.cache + result = self.function(message, adapter) + self.cache = result + return result diff --git a/recon_lw/interpretation/converter/__init__.py b/recon_lw/interpretation/converter/__init__.py new file mode 100644 index 0000000..b2c028f --- /dev/null +++ b/recon_lw/interpretation/converter/__init__.py @@ -0,0 +1,10 @@ +from recon_lw.interpretation.converter.base import Converter +from recon_lw.interpretation.converter.constant import ConstantConverter +from recon_lw.interpretation.converter.empty_string import EmptyStringConverter +from recon_lw.interpretation.converter.function import FunctionConverter +from recon_lw.interpretation.converter.type import TypeConverter, TypeAlias +from recon_lw.interpretation.converter.mapping import MappingConverter +from recon_lw.interpretation.converter.list import IndexListConverter, AggregationListConverter +from recon_lw.interpretation.converter.chain import ChainConverter, FirstNonNullChainConverter +from recon_lw.interpretation.converter.dictionary import DictKeysConverter, DictPathConverter +from recon_lw.interpretation.converter.datetime import DateConverter, DateTimeConverter diff --git a/recon_lw/interpretation/converter/base.py b/recon_lw/interpretation/converter/base.py new file mode 100644 index 0000000..85f7492 --- /dev/null +++ b/recon_lw/interpretation/converter/base.py @@ -0,0 +1,18 @@ +from abc import ABC, abstractmethod +from typing import Any, Protocol +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter.base import Adapter + + +class Converter(ABC): + + def __call__(self, message: Message, field: str, val: Any, adapter: Adapter) -> Any: + return self.convert(message, field, val, adapter) + + @abstractmethod + def convert(self, message: Message, field: str, val: Any, adapter: Adapter): + pass + +class ConverterProtocol(Protocol): + def __call__(self, message: Message, field: str, val: Any, adapter: Adapter): + pass \ No newline at end of file diff --git a/recon_lw/interpretation/converter/boolean.py b/recon_lw/interpretation/converter/boolean.py new file mode 100644 index 0000000..0b98171 --- /dev/null +++ b/recon_lw/interpretation/converter/boolean.py @@ -0,0 +1,16 @@ +from base import Converter +from typing import Dict + +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class BooleanConverter(Converter): + def __init__(self, mapping: Dict[str, str], default_val: str=''): + self.mapping = mapping + self.default_val = default_val + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + if field in self.mapping: + return self.mapping[field] + return self.default_val \ No newline at end of file diff --git a/recon_lw/interpretation/converter/chain.py b/recon_lw/interpretation/converter/chain.py new file mode 100644 index 0000000..25a5822 --- /dev/null +++ b/recon_lw/interpretation/converter/chain.py @@ -0,0 +1,30 @@ +from typing import List, Any + +from recon_lw.interpretation.converter.base import Converter, ConverterProtocol +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message +from recon_lw.interpretation.field_extractor import Extractor + + +class ChainConverter(Converter): + def __init__(self, converters: List[ConverterProtocol]): + self._converters = converters + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + for converter in self._converters: + val = converter(message, field, val, adapter) + if val is None or val == Extractor.NOT_EXTRACTED: + return val + return val + + +class FirstNonNullChainConverter(Converter): + def __init__(self, converters: List[ConverterProtocol]): + self._converters = converters + + def convert(self, message: Message, field: str, val: Any, adapter: Adapter): + for converter in self._converters: + converted = converter(message, field, val, adapter) + if converted != Extractor.NOT_EXTRACTED: + return converted + return Extractor.NOT_EXTRACTED diff --git a/recon_lw/interpretation/converter/condition.py b/recon_lw/interpretation/converter/condition.py new file mode 100644 index 0000000..d28e2d4 --- /dev/null +++ b/recon_lw/interpretation/converter/condition.py @@ -0,0 +1,23 @@ +from base import Converter, ConverterProtocol +from recon_lw.interpretation.adapter import Adapter +from recon_lw.interpretation.condition import Condition +from dummy import DummyConverter +from recon_lw.core.type.types import Message + + +class ConditionConverter(Converter): + def __init__(self, condition: Condition, true_converter: ConverterProtocol=None, false_converter: ConverterProtocol = None): + self.condition = condition + if true_converter is None: + true_converter = DummyConverter() + if false_converter is None: + false_converter = DummyConverter() + + self.true_converter = true_converter + self.false_converter = false_converter + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + if self.condition(message, adapter): + self.true_converter(message, field, val, adapter) + else: + self.false_converter(message, field, val, adapter) \ No newline at end of file diff --git a/recon_lw/interpretation/converter/constant.py b/recon_lw/interpretation/converter/constant.py new file mode 100644 index 0000000..b2b0342 --- /dev/null +++ b/recon_lw/interpretation/converter/constant.py @@ -0,0 +1,9 @@ +from recon_lw.interpretation.converter.base import Converter +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter.base import Adapter + + +class ConstantConverter(Converter): + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + return val diff --git a/recon_lw/interpretation/converter/datetime.py b/recon_lw/interpretation/converter/datetime.py new file mode 100644 index 0000000..b2945eb --- /dev/null +++ b/recon_lw/interpretation/converter/datetime.py @@ -0,0 +1,20 @@ +from recon_lw.interpretation.converter.base import Converter +import datetime +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter.base import Adapter + + +class DateTimeConverter(Converter): + def __init__(self, fmt='%Y-%m-%d %H:%M:%S'): + self.fmt = fmt + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + return datetime.datetime.strptime(val, self.fmt) + + +class DateConverter(Converter): + def __init__(self, fmt='%Y-%m-%d'): + self.fmt = fmt + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + return datetime.date.strftime(val, self.fmt) diff --git a/recon_lw/interpretation/converter/dictionary.py b/recon_lw/interpretation/converter/dictionary.py new file mode 100644 index 0000000..00e378d --- /dev/null +++ b/recon_lw/interpretation/converter/dictionary.py @@ -0,0 +1,37 @@ +from abc import ABC + +from recon_lw.interpretation.converter.base import Converter +from typing import Set, Any + +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class DictPathConverter(Converter): + def __init__(self, + path: Set[str] = None, + ): + self.path = path + + def convert(self, message: Message, field: str, val: Any, adapter: Adapter): + if not isinstance(val, dict): + raise ValueError('DictPathConverter expects a dictionary value passed from the converter that is before ' + 'in the chain.') + for key in self.path: + val = val[key] + if not isinstance(val, dict): + raise ValueError('DictPathConverter expects a dictionary value passed from the converter that is ' + 'before in the chain.') + + return val + + +class DictKeysConverter(Converter, ABC): + def __init__(self, + keys: Set[str] = None, + separator: str = "/"): + self.key = separator.join(keys) + + def extract(self, message: Message, field: str, val: Any, adapter: Adapter): + assert isinstance(val, dict) + return val[self.key] diff --git a/recon_lw/interpretation/converter/dummy.py b/recon_lw/interpretation/converter/dummy.py new file mode 100644 index 0000000..f9f0f51 --- /dev/null +++ b/recon_lw/interpretation/converter/dummy.py @@ -0,0 +1,8 @@ +from base import Converter +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class DummyConverter(Converter): + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + return val \ No newline at end of file diff --git a/recon_lw/interpretation/converter/empty_string.py b/recon_lw/interpretation/converter/empty_string.py new file mode 100644 index 0000000..52c88c0 --- /dev/null +++ b/recon_lw/interpretation/converter/empty_string.py @@ -0,0 +1,13 @@ +from recon_lw.interpretation.converter.base import Converter +from recon_lw.interpretation.field_extractor.base import Extractor +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter.base import Adapter + + +class EmptyStringConverter(Converter): + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + if val is None or val == '': + return Extractor.NOT_EXTRACTED + + return val diff --git a/recon_lw/interpretation/converter/function.py b/recon_lw/interpretation/converter/function.py new file mode 100644 index 0000000..319d1ee --- /dev/null +++ b/recon_lw/interpretation/converter/function.py @@ -0,0 +1,12 @@ +from recon_lw.interpretation.converter.base import Converter +from typing import Callable, Any +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter.base import Adapter + + +class FunctionConverter(Converter): + def __init__(self, function: Callable[[str, Adapter], Any]): + self.function = function + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + return self.function(val, adapter) diff --git a/recon_lw/interpretation/converter/length.py b/recon_lw/interpretation/converter/length.py new file mode 100644 index 0000000..d895c9d --- /dev/null +++ b/recon_lw/interpretation/converter/length.py @@ -0,0 +1,10 @@ +from typing import Any + +from base import Converter +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class LengthConverter(Converter): + def convert(self, message: Message, field: str, val: Any, adapter: Adapter): + return len(val) diff --git a/recon_lw/interpretation/converter/list.py b/recon_lw/interpretation/converter/list.py new file mode 100644 index 0000000..c1422a9 --- /dev/null +++ b/recon_lw/interpretation/converter/list.py @@ -0,0 +1,75 @@ +from recon_lw.interpretation.converter.base import Converter +from typing import List, Dict, Optional, Callable +from collections import defaultdict + +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class AggregationListConverter(Converter): + def __init__(self, + field_name: str, + keys: Optional[List[str]] = None, + values: Optional[List[str]] = None, + separator: str = "/", + result_type=dict, + keys_remap: Optional[Dict[str, str]] = None + ): + self.keys = keys + self.values = values + self.separator = separator + self.result_type = result_type + self.keys_remap = keys_remap + self.field_name = field_name + self.cache = None + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + + if self.cache is not None: + return self.cache + + body = adapter.get_body(message) + + buffer = defaultdict(dict) + field = self.field_name + field += '.' + for k, v in body.items(): + if k.startswith(field): + _, n, sub_field = k.split('.', maxsplit=2) + if self.keys_remap: + sub_field = self.keys_remap.get(sub_field, sub_field) + if not sub_field: + continue + buffer[n][sub_field] = v + + result = None + if self.result_type == dict: + result = {} + + for _, v in buffer.items(): + key = self.separator.join([str(v[k]) for k in self.keys]) + value = self.separator.join([str(v[k]) for k in self.values]) + + if key in result: + raise ValueError(f"Duplicate key = {key}, message - {message}") + + result[key] = value + elif self.result_type == list: + result = list({k: str(v) for k, v in val.items()} for val in buffer.values()) + + self.cache = result + + return result + + +class IndexListConverter(Converter): + def __init__(self, + index_calculation_function: Callable[[Message, str, list, Adapter], int] + ): + self.index_calculation_function = index_calculation_function + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + if not isinstance(val, list): + return val + + return val[self.index_calculation_function(message, field, val, adapter)] diff --git a/recon_lw/interpretation/converter/mapping.py b/recon_lw/interpretation/converter/mapping.py new file mode 100644 index 0000000..2aa057d --- /dev/null +++ b/recon_lw/interpretation/converter/mapping.py @@ -0,0 +1,14 @@ +from recon_lw.interpretation.converter.base import Converter +from recon_lw.interpretation.field_extractor.base import Extractor +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter.base import Adapter + + +class MappingConverter(Converter): + def __init__(self, mapping: dict): + self.mapping = mapping + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + if field in self.mapping: + return self.mapping[field] + return Extractor.NOT_EXTRACTED diff --git a/recon_lw/interpretation/converter/regex.py b/recon_lw/interpretation/converter/regex.py new file mode 100644 index 0000000..1d274f2 --- /dev/null +++ b/recon_lw/interpretation/converter/regex.py @@ -0,0 +1,15 @@ +from base import Converter +import re +from typing import Pattern, Any + +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class RegexConverter(Converter): + def __init__(self, regex: Pattern[str]): + self.regex = re.compile(regex) + + def convert(self, message: Message, field: str, val: Any, adapter: Adapter): + match = self.regex.match(val) + return match \ No newline at end of file diff --git a/recon_lw/interpretation/converter/type.py b/recon_lw/interpretation/converter/type.py new file mode 100644 index 0000000..8f65b0f --- /dev/null +++ b/recon_lw/interpretation/converter/type.py @@ -0,0 +1,31 @@ +from recon_lw.interpretation.converter.base import Converter +from enum import Enum +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter.base import Adapter + +class TypeAlias(Enum): + Int = 'int' + Float = 'float' + string = 'str' + list = 'list' + dict = 'dict' + +class TypeConverter(Converter): + def __init__(self, type_alias: TypeAlias): + self.type_alias = type_alias + self.converter_func = TypeConverter.converter_func(type_alias) + + @staticmethod + def converter_func(type_alias: TypeAlias): + type_converters = { + 'int': int, + 'float': float, + 'str': str, + 'list': list, + 'dict': dict + } + + return type_converters[type_alias] + + def convert(self, message: Message, field: str, val: str, adapter: Adapter): + self.converter_func(val) diff --git a/recon_lw/interpretation/field_checker/__init__.py b/recon_lw/interpretation/field_checker/__init__.py new file mode 100644 index 0000000..39afabd --- /dev/null +++ b/recon_lw/interpretation/field_checker/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.interpretation.field_checker.base import * +from recon_lw.interpretation.field_checker.simple import * \ No newline at end of file diff --git a/recon_lw/interpretation/field_checker/base.py b/recon_lw/interpretation/field_checker/base.py new file mode 100644 index 0000000..4a12194 --- /dev/null +++ b/recon_lw/interpretation/field_checker/base.py @@ -0,0 +1,22 @@ +from abc import ABC, abstractmethod +from dataclasses import Field +from typing import Dict, Iterator, Protocol +from recon_lw.interpretation.check_rule import IFieldCheckRule +from recon_lw.interpretation.check_rule.base import FieldCheckResult, IFieldCheckRuleProtocol + + +class FieldChecker(ABC): + + def __init__(self, rules: Dict[str, IFieldCheckRuleProtocol]): + self.rules = rules + + def __call__(self, msg1, msg2): + return self.compare(msg1, msg2) + + @abstractmethod + def compare(self, msg1, msg2) -> Iterator[FieldCheckResult]: + pass + +class FieldCheckerProtocol(Protocol): + def __call__(self, msg1, msg2): + pass diff --git a/recon_lw/interpretation/field_checker/simple.py b/recon_lw/interpretation/field_checker/simple.py new file mode 100644 index 0000000..5e5d759 --- /dev/null +++ b/recon_lw/interpretation/field_checker/simple.py @@ -0,0 +1,17 @@ +from typing import Iterator, Dict +from recon_lw.interpretation.check_rule import IFieldCheckRule +from recon_lw.interpretation.check_rule import FieldCheckResult +from recon_lw.interpretation.check_rule.base import IFieldCheckRuleProtocol +from recon_lw.interpretation.field_checker.base import FieldChecker + + +class SimpleFieldChecker(FieldChecker): + def __init__(self, rules: Dict[str, IFieldCheckRuleProtocol]): + super().__init__(rules) + + def compare(self, msg1, msg2) -> Iterator[FieldCheckResult]: + for field, rule in self.rules.items(): + check_rule_result = rule(field, msg1, msg2) + + if check_rule_result.result is False: + yield check_rule_result diff --git a/recon_lw/interpretation/field_extractor/__init__.py b/recon_lw/interpretation/field_extractor/__init__.py new file mode 100644 index 0000000..e56d9e5 --- /dev/null +++ b/recon_lw/interpretation/field_extractor/__init__.py @@ -0,0 +1,7 @@ +from recon_lw.interpretation.field_extractor.base import Extractor +from recon_lw.interpretation.field_extractor.cache import CacheFillWithConditionExtractor, CacheFillWithConditionExtractorBuilder +from recon_lw.interpretation.field_extractor.constant import ConstantExtractor, ConstantExtractorBuilder +from recon_lw.interpretation.field_extractor.converter import ChainConverterExtractor, BasicConverterExtractor, \ + BasicConverterExtractorBuilder, ChainConverterExtractorBuilder +from recon_lw.interpretation.field_extractor.dictionary import BasicDictExtractor, BasicDictExtractorBuilder +from recon_lw.interpretation.field_extractor.condition import ConditionExtractor, ConditionMaskExtractor, MaskValueProvider diff --git a/recon_lw/interpretation/field_extractor/any_val.py b/recon_lw/interpretation/field_extractor/any_val.py new file mode 100644 index 0000000..d308ee9 --- /dev/null +++ b/recon_lw/interpretation/field_extractor/any_val.py @@ -0,0 +1,25 @@ +from typing import Any + +from recon_lw.interpretation.adapter import Adapter +from recon_lw.interpretation.field_extractor.base import Extractor +from recon_lw.core.type.types import Message + + +class AnyVal: + + def __eq__(self, other): + return Extractor.NOT_EXTRACTED != other + + def __ne__(self, other): + return False + + def __str__(self): + return "*" + + +class AnyValExtractor(Extractor): + def __init__(self): + super().__init__('AnyVal') + + def extract(self, message: Message, adapter: Adapter) -> Any: + return AnyVal() diff --git a/recon_lw/interpretation/field_extractor/base.py b/recon_lw/interpretation/field_extractor/base.py new file mode 100644 index 0000000..dbc0e26 --- /dev/null +++ b/recon_lw/interpretation/field_extractor/base.py @@ -0,0 +1,23 @@ +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import Message +from typing import Optional, Any, Protocol, runtime_checkable +from abc import ABC, abstractmethod + +class Extractor(ABC): + """ + An abstract base class for all extractors. + """ + NOT_EXTRACTED = "_NE_" + + def __init__(self, field_name: str): + self.field_name = field_name + + def __call__(self, message: Message, adapter: Adapter) -> Optional[Any]: + pass + + def extract(self, message: Message, adapter: Adapter) -> Optional[Any]: + pass + +class ExtractorProtocol(Protocol): + def __call__(self, message: Message, adapter: Adapter): + pass \ No newline at end of file diff --git a/recon_lw/interpretation/field_extractor/cache.py b/recon_lw/interpretation/field_extractor/cache.py new file mode 100644 index 0000000..12c0d4c --- /dev/null +++ b/recon_lw/interpretation/field_extractor/cache.py @@ -0,0 +1,124 @@ +from recon_lw.interpretation.field_extractor.base import Extractor +from typing import Optional + +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import Message +from recon_lw.interpretation.condition import Condition + + +class SimpleCacheExtractor(Extractor): + """ + Caches and extracts field values, using an entity ID to differentiate cache entries. + """ + + def __init__(self, field_name: str, entity_id_extractor: Extractor, field_extractor: Extractor, empty_values=None): + super().__init__(field_name) + self.empty_values = empty_values if empty_values is not None else {Extractor.NOT_EXTRACTED, ""} + self.field_name = field_name + self.entity_id_extractor = entity_id_extractor + self.field_extractor = field_extractor + + def extract(self, message: Message, adapter) -> Optional[str]: + entity_id = self.entity_id_extractor(message, adapter) + val = self.field_extractor(message, adapter) + if val not in self.empty_values: + adapter.adapter_context.cache.setdefault(entity_id, {})[self.field_name] = val + return val + return adapter.adapter_context.cache.get(entity_id, {}).get(self.field_name, Extractor.NOT_EXTRACTED) + + +class SimpleCacheExtractorBuilder: + def __init__(self): + self.field_name = "" + self.entity_id_extractor = None + self.field_extractor = None + self.empty_values = None + + def set_field_name(self, field_name: str) -> 'SimpleCacheExtractorBuilder': + self.field_name = field_name + return self + + def set_entity_id_extractor(self, extractor: Extractor) -> 'SimpleCacheExtractorBuilder': + self.entity_id_extractor = extractor + return self + + def set_field_extractor(self, extractor: Extractor) -> 'SimpleCacheExtractorBuilder': + self.field_extractor = extractor + return self + + def set_empty_values(self, empty_values: set) -> 'SimpleCacheExtractorBuilder': + self.empty_values = empty_values + return self + + def build(self) -> SimpleCacheExtractor: + if not self.field_name or not self.entity_id_extractor or not self.field_extractor: + raise ValueError("Field name, entity ID extractor, and field extractor must be set.") + return SimpleCacheExtractor(self.field_name, self.entity_id_extractor, self.field_extractor, self.empty_values) + + +class CacheFillWithConditionExtractor(Extractor): + """ + Extracts and caches field values based on a condition. + """ + + def __init__(self, + field_name: str, + entity_id_extractor: Extractor, + field_extractor: Extractor, + no_val_in_cache=b"404", + condition=None): + super().__init__(field_name) + self.field_name = field_name + self.entity_id_extractor = entity_id_extractor + self.field_extractor = field_extractor + self.no_val_in_cache = no_val_in_cache + self.condition = condition if condition is not None else lambda msg: False + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + entity_id = self.entity_id_extractor(message, adapter) + val = self.field_extractor(message, adapter) + if val in [self.no_val_in_cache, Extractor.NOT_EXTRACTED] or self.condition(message): + adapter.adapter_context.cache.setdefault(entity_id, {})[self.field_name] = val + else: + val = adapter.adapter_context.cache.get(entity_id, {}).get(self.field_name, self.no_val_in_cache) + return val + + +class CacheFillWithConditionExtractorBuilder: + def __init__(self): + self.field_name = "" + self.entity_id_extractor = None + self.field_extractor = None + self.no_val_in_cache = b"404" + self.condition = lambda msg: False + + def set_field_name(self, field_name: str) -> 'CacheFillWithConditionExtractorBuilder': + self.field_name = field_name + return self + + def set_entity_id_extractor(self, extractor: Extractor) -> 'CacheFillWithConditionExtractorBuilder': + self.entity_id_extractor = extractor + return self + + def set_field_extractor(self, extractor: Extractor) -> 'CacheFillWithConditionExtractorBuilder': + self.field_extractor = extractor + return self + + def set_no_val_in_cache(self, no_val: bytes) -> 'CacheFillWithConditionExtractorBuilder': + self.no_val_in_cache = no_val + return self + + def set_condition(self, condition: Condition) -> 'CacheFillWithConditionExtractorBuilder': + self.condition = condition + return self + + def build(self) -> CacheFillWithConditionExtractor: + if not self.field_name or not self.entity_id_extractor or not self.field_extractor: + raise ValueError("Field name, entity ID extractor, and field extractor must be set.") + return CacheFillWithConditionExtractor( + self.field_name, + self.entity_id_extractor, + self.field_extractor, + self.no_val_in_cache, + self.condition + ) diff --git a/recon_lw/interpretation/field_extractor/concat.py b/recon_lw/interpretation/field_extractor/concat.py new file mode 100644 index 0000000..4746bd5 --- /dev/null +++ b/recon_lw/interpretation/field_extractor/concat.py @@ -0,0 +1,18 @@ +from typing import List, Optional, Any + +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter import Adapter + +from recon_lw.interpretation.field_extractor.base import Extractor + +class ConcatExtractor(Extractor): + def __init__(self, extractors: List[Extractor], separator: str=''): + super().__init__('any') + self.extractors = extractors + self.separator = separator + + def extract(self, message: Message, adapter: Adapter) -> Optional[Any]: + vals = [] + for extractor in self.extractors: + vals.append(extractor(message, adapter)) + return self.separator.join(vals) \ No newline at end of file diff --git a/recon_lw/interpretation/field_extractor/condition.py b/recon_lw/interpretation/field_extractor/condition.py new file mode 100644 index 0000000..3e786f4 --- /dev/null +++ b/recon_lw/interpretation/field_extractor/condition.py @@ -0,0 +1,51 @@ +from typing import Callable +from typing import Optional + +from recon_lw.interpretation.field_extractor.base import Extractor +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.interpretation.condition import Condition +from recon_lw.core.type.types import Message + + +class MaskValueProvider: + def __init__(self, mask_value_function: Callable[[Message, Adapter], str]) -> None: + self.mask_value_function = mask_value_function + + def get_mask_value(self, message: Message, adapter: Adapter): + return self.mask_value_function(message, adapter) + + +class ConditionMaskExtractor(Extractor): + def __init__(self, + base_extractor: Extractor, + condition: Condition, + mask_value_provider: MaskValueProvider + ): + super().__init__('any') + self.condition = condition + self.mask_value_provider = mask_value_provider + self.base_extractor = base_extractor + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + if self.condition(message, adapter): + return self.mask_value_provider.get_mask_value(message, adapter) + + return self.base_extractor(message, adapter) + + +class ConditionExtractor(Extractor): + def __init__(self, + true_extractor: Extractor, + false_extractor: Extractor, + condition: Condition + ): + super().__init__('any') + self.true_extractor = true_extractor + self.false_extractor = false_extractor + self.condition = condition + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + if self.condition(message, adapter): + return self.true_extractor(message, adapter) + else: + return self.false_extractor(message, adapter) diff --git a/recon_lw/interpretation/field_extractor/constant.py b/recon_lw/interpretation/field_extractor/constant.py new file mode 100644 index 0000000..a96a26f --- /dev/null +++ b/recon_lw/interpretation/field_extractor/constant.py @@ -0,0 +1,40 @@ +from recon_lw.interpretation.field_extractor.base import Extractor +from typing import Optional, Any + +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import Message + + +class ConstantExtractor(Extractor): + """ + Always returns a constant value. + """ + + def __init__(self, return_value: Any): + super().__init__('any') + self.return_value = return_value + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + return self.return_value + + +class ConstantExtractorBuilder: + def __init__(self): + self.return_value = None + + def set_return_value(self, return_value: Any) -> 'ConstantExtractorBuilder': + self.return_value = return_value + return self + + def build(self) -> ConstantExtractor: + if self.return_value is None: + raise ValueError("Return value must be set.") + return ConstantExtractor(self.return_value) + + +class NEConstantExtractor(ConstantExtractor): + def __init__(self): + super().__init__(Extractor.NOT_EXTRACTED) + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + return super().extract(message, adapter) diff --git a/recon_lw/interpretation/field_extractor/converter.py b/recon_lw/interpretation/field_extractor/converter.py new file mode 100644 index 0000000..9ca898a --- /dev/null +++ b/recon_lw/interpretation/field_extractor/converter.py @@ -0,0 +1,82 @@ +from recon_lw.interpretation.field_extractor.base import Extractor +from recon_lw.interpretation.adapter.base import Adapter +from typing import Optional, List +from recon_lw.core.type.types import Message +from recon_lw.interpretation.converter.base import Converter, ConverterProtocol + + +class BasicConverterExtractor(Extractor): + def __init__(self, field_name: str, converter: ConverterProtocol, base_extractor: Extractor): + super().__init__(field_name) + self.converter = converter + self.base_extractor = base_extractor + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + val = self.base_extractor(message, adapter) + + return self.converter(message, self.field_name, val, adapter) + + +class BasicConverterExtractorBuilder: + def __init__(self): + self.field_name = "" + self.converter = None + self.base_extractor = None + + def set_field_name(self, field_name: str) -> 'BasicConverterExtractorBuilder': + self.field_name = field_name + return self + + def set_converter(self, converter: Converter) -> 'BasicConverterExtractorBuilder': + self.converter = converter + return self + + def set_base_extractor(self, base_extractor: Extractor): + self.base_extractor = base_extractor + return self + + def build(self) -> BasicConverterExtractor: + if not self.field_name or not self.converter: + raise ValueError("Field name and converter must be set.") + return BasicConverterExtractor(self.field_name, self.converter, self.base_extractor) + + +class ChainConverterExtractor(Extractor): + def __init__(self, field_name: str, base_extractor: Extractor, converter_chain: List[ConverterProtocol]): + super().__init__(field_name) + self.base_extractor = base_extractor + self.converter_chain = converter_chain + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + val = self.base_extractor(message, adapter) + if val != Extractor.NOT_EXTRACTED: + val = str(val) + + for converter in self.converter_chain: + val = converter(message, self.field_name, val, adapter) + return val + + +class ChainConverterExtractorBuilder(ChainConverterExtractor): + def __init__(self, field_name: str, base_extractor: Extractor, converter_chain: List[Converter]): + super().__init__(field_name, base_extractor, converter_chain) + self.field_name = "" + self.converter_chain: List[Converter] = [] + self.base_extractor = None + + def set_field_name(self, field_name: str) -> 'ChainConverterExtractorBuilder': + self.field_name = field_name + return self + + def add_converter(self, converter: Converter) -> 'ChainConverterExtractorBuilder': + self.converter_chain.append(converter) + return self + + def set_base_extractor(self, base_extractor: Extractor) -> 'ChainConverterExtractorBuilder': + self.base_extractor = base_extractor + return self + + def build(self) -> ChainConverterExtractor: + if not self.field_name or len(self.converter_chain) == 0: + raise ValueError("Field name and converter must be set.") + return ChainConverterExtractor(self.field_name, self.base_extractor, self.converter_chain) diff --git a/recon_lw/interpretation/field_extractor/dictionary.py b/recon_lw/interpretation/field_extractor/dictionary.py new file mode 100644 index 0000000..d8c129c --- /dev/null +++ b/recon_lw/interpretation/field_extractor/dictionary.py @@ -0,0 +1,39 @@ +from recon_lw.interpretation.field_extractor.base import Extractor +from recon_lw.interpretation.adapter.base import Adapter +from typing import Optional +from recon_lw.core.type.types import Message + + +class BasicDictExtractor(Extractor): + def __init__(self, field_name: str, default_value: str = None, strip: bool = False, cast_to_str: bool = True): + super().__init__(field_name) + self.strip = strip + self.default_value = default_value + self.cast_to_str = cast_to_str + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + val = message.get(self.field_name, self.default_value if self.default_value else Extractor.NOT_EXTRACTED) + if self.cast_to_str: + val = str(val) + if self.strip: + val = val.strip() + + return val + + +class BasicDictExtractorBuilder: + def __init__(self): + self.field_name = "" + + def set_field_name(self, field_name: str) -> 'BasicDictExtractorBuilder': + self.field_name = field_name + return self + + def set_strip(self, strip: bool) -> 'BasicDictExtractorBuilder': + self.strip = strip + return self + + def build(self) -> BasicDictExtractor: + if not self.field_name: + raise ValueError("Field name must be set.") + return BasicDictExtractor(self.field_name) diff --git a/recon_lw/interpretation/field_extractor/list.py b/recon_lw/interpretation/field_extractor/list.py new file mode 100644 index 0000000..c8a9586 --- /dev/null +++ b/recon_lw/interpretation/field_extractor/list.py @@ -0,0 +1,62 @@ +from collections import defaultdict +from typing import Optional, List, Dict + +from base import Extractor +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class ListAggregationExtractor(Extractor): + def __init__(self, + field_name: str, + keys: Optional[List[str]] = None, + values: Optional[List[str]] = None, + separator: str = "/", + result_type=dict, + keys_remap: Optional[Dict[str, str]] = None + ): + super().__init__('any') + self.keys = keys + self.values = values + self.separator = separator + self.result_type = result_type + self.keys_remap = keys_remap + self.field_name = field_name + self.cache = None + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + if self.cache is not None: + return self.cache + + body = adapter.get_body(message) + + buffer = defaultdict(dict) + field = self.field_name + field += '.' + for k, v in body.items(): + if k.startswith(field): + _, n, sub_field = k.split('.', maxsplit=2) + if self.keys_remap: + sub_field = self.keys_remap.get(sub_field, sub_field) + if not sub_field: + continue + buffer[n][sub_field] = v + + result = None + if self.result_type == dict: + result = {} + + for _, v in buffer.items(): + key = self.separator.join([str(v[k]) for k in self.keys]) + value = self.separator.join([str(v[k]) for k in self.values]) + + if key in result: + raise ValueError(f"Duplicate key = {key}, message - {message}") + + result[key] = value + elif self.result_type == list: + result = list({k: str(v) for k, v in val.items()} for val in buffer.values()) + + self.cache = result + + return result diff --git a/recon_lw/interpretation/field_extractor/one_of.py b/recon_lw/interpretation/field_extractor/one_of.py new file mode 100644 index 0000000..125f7ca --- /dev/null +++ b/recon_lw/interpretation/field_extractor/one_of.py @@ -0,0 +1,18 @@ +from typing import Optional, Dict + +from base import Extractor +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class OneOfExtractor(Extractor): + def __init__(self, extractors: Dict[str, Extractor]): + super().__init__('any') + self.extractors = extractors + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + body = adapter.get_body(message) + for field_name, extractor in self.extractors.items(): + if field_name in body: + return extractor(message, adapter) + return Extractor.NOT_EXTRACTED diff --git a/recon_lw/interpretation/field_extractor/refdata.py b/recon_lw/interpretation/field_extractor/refdata.py new file mode 100644 index 0000000..d2b35ae --- /dev/null +++ b/recon_lw/interpretation/field_extractor/refdata.py @@ -0,0 +1,69 @@ +from recon_lw.interpretation.field_extractor.base import Extractor +from recon_lw.interpretation.adapter.base import Adapter +from typing import Optional, Any, Dict +from recon_lw.core.type.types import Message + + +class SimpleRefDataFieldExtractor(Extractor): + """ + Compares extracted data against a reference dataset, highlighting discrepancies. + """ + + def __init__(self, field_name: str, base_extractor: Extractor, ref_data_key_extractor: Extractor, + ref_data_dict: Dict[str, Any], not_found_field_prefix="NOT_FOUND"): + super().__init__(field_name) + self.base_extractor = base_extractor + self.ref_data_key_extractor = ref_data_key_extractor + self.ref_data_dict = ref_data_dict + self.not_found_field_prefix = not_found_field_prefix + + def extract(self, message: Message, adapter: Adapter) -> Optional[str]: + message_value = self.base_extractor(message, adapter) + refdata_key = self.ref_data_key_extractor(message, adapter) + refdata_value = self.ref_data_dict.get(refdata_key) + + if message_value != Extractor.NOT_EXTRACTED and refdata_value != message_value: + discrepancy = f"[{refdata_value} != {message_value}]" if refdata_value \ + else f"{self.not_found_field_prefix} != {message_value}" + return discrepancy + return message_value + + +class SimpleRefDataFieldExtractorBuilder: + def __init__(self): + self.field_name = "" + self.base_extractor = None + self.ref_data_key_extractor = None + self.ref_data_dict = {} + self.not_found_field_prefix = "NOT_FOUND" + + def set_field_name(self, field_name: str) -> 'SimpleRefDataFieldExtractorBuilder': + self.field_name = field_name + return self + + def set_base_extractor(self, extractor: Extractor) -> 'SimpleRefDataFieldExtractorBuilder': + self.base_extractor = extractor + return self + + def set_ref_data_key_extractor(self, extractor: Extractor) -> 'SimpleRefDataFieldExtractorBuilder': + self.ref_data_key_extractor = extractor + return self + + def set_ref_data_dict(self, ref_data_dict: Dict[str, Any]) -> 'SimpleRefDataFieldExtractorBuilder': + self.ref_data_dict = ref_data_dict + return self + + def set_not_found_field_prefix(self, prefix: str) -> 'SimpleRefDataFieldExtractorBuilder': + self.not_found_field_prefix = prefix + return self + + def build(self) -> SimpleRefDataFieldExtractor: + if not self.field_name or not self.base_extractor or not self.ref_data_key_extractor: + raise ValueError("Field name, base extractor, and ref data key extractor must be set.") + return SimpleRefDataFieldExtractor( + self.field_name, + self.base_extractor, + self.ref_data_key_extractor, + self.ref_data_dict, + self.not_found_field_prefix + ) diff --git a/recon_lw/interpretation/filter/__init__.py b/recon_lw/interpretation/filter/__init__.py new file mode 100644 index 0000000..2cbdfca --- /dev/null +++ b/recon_lw/interpretation/filter/__init__.py @@ -0,0 +1,6 @@ +from recon_lw.interpretation.filter.base import * +from recon_lw.interpretation.filter.field import * +from recon_lw.interpretation.filter.filter_chain import * +from recon_lw.interpretation.filter.message_type import * +from recon_lw.interpretation.filter.session_alias import * +from recon_lw.interpretation.filter.dummy import * \ No newline at end of file diff --git a/recon_lw/interpretation/filter/amend_reject.py b/recon_lw/interpretation/filter/amend_reject.py new file mode 100644 index 0000000..5978836 --- /dev/null +++ b/recon_lw/interpretation/filter/amend_reject.py @@ -0,0 +1,47 @@ +from base import Filter +from typing import Set + +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class AmendRejectFilter(Filter): + def __init__(self, + message_types=None, + rej_code_field_name='reject_code', + rej_text_field_name='reject_text', + invalid_rej_codes: Set[str]=None, + invalid_reason_codes: Set[str]=None + ): + + if invalid_rej_codes is None: + invalid_rej_codes = { '0', '1' } + self.invalid_rej_codes = invalid_rej_codes + + if message_types is None: + message_types = {'OrderCancelReject'} + self.message_types = message_types + + if invalid_reason_codes is None: + invalid_reason_codes = {'1000'} + + self.invalid_reason_codes = invalid_reason_codes + + self.rej_code_field_name = rej_code_field_name + self.rej_text_field_name = rej_text_field_name + + def filter(self, message: Message, adapter: Adapter) -> bool: + mt = adapter.get_message_type(message) + if mt in self.message_types: + rej_code = adapter.get(message, self.rej_code_field_name) + + if rej_code in self.invalid_rej_codes: + return False + + rej_text = adapter.get(message, self.rej_text_field_name) + + for invalid_reason_code in self.invalid_reason_codes: + if invalid_reason_code in rej_text: + return False + + return True \ No newline at end of file diff --git a/recon_lw/interpretation/filter/base.py b/recon_lw/interpretation/filter/base.py new file mode 100644 index 0000000..c0883e0 --- /dev/null +++ b/recon_lw/interpretation/filter/base.py @@ -0,0 +1,18 @@ +from abc import ABC, abstractmethod +from typing import Protocol + +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import Message + + +class Filter(ABC): + def __call__(self, message: Message, adapter: Adapter) -> bool: + return self.filter(message, adapter) + + @abstractmethod + def filter(self, message: Message, adapter: Adapter) -> bool: + pass + +class FilterProtocol(Protocol): + def __call__(self, message: Message, adapter: Adapter) -> bool: + pass diff --git a/recon_lw/interpretation/filter/dummy.py b/recon_lw/interpretation/filter/dummy.py new file mode 100644 index 0000000..df51d1b --- /dev/null +++ b/recon_lw/interpretation/filter/dummy.py @@ -0,0 +1,8 @@ +from recon_lw.interpretation.filter.base import Filter +from recon_lw.interpretation.adapter import Adapter +from recon_lw.core.type.types import Message + + +class DummyFilter(Filter): + def filter(self, message: Message, adapter: Adapter) -> bool: + return False \ No newline at end of file diff --git a/recon_lw/interpretation/filter/field.py b/recon_lw/interpretation/filter/field.py new file mode 100644 index 0000000..c09d71b --- /dev/null +++ b/recon_lw/interpretation/filter/field.py @@ -0,0 +1,22 @@ +from recon_lw.interpretation.field_extractor import Extractor +from recon_lw.interpretation.filter.base import Filter +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import Message +from typing import List, Any + + +class FieldFilter(Filter): + def __init__(self, field_name: str, field_values: List[Any], whitelist: bool = True): + super().__init__() + self.field_name = field_name + self.field_values = field_values + self.whitelist = whitelist + + def filter(self, message: Message, adapter: Adapter) -> bool: + val = adapter.get(message, self.field_name) + if val == Extractor.NOT_EXTRACTED: + val = adapter.get_body(message).get(self.field_name, Extractor.NOT_EXTRACTED) + if self.whitelist: + return val not in self.field_values + else: + return val in self.field_values diff --git a/recon_lw/interpretation/filter/filter_chain.py b/recon_lw/interpretation/filter/filter_chain.py new file mode 100644 index 0000000..540c789 --- /dev/null +++ b/recon_lw/interpretation/filter/filter_chain.py @@ -0,0 +1,24 @@ +from recon_lw.interpretation.filter.base import Filter, FilterProtocol +from typing import List, Union +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter.base import Adapter + + +class FilterChain: + def __init__(self): + self.filters: List[Union[Filter, FilterProtocol]] = [] + + def filter(self, message: Message, adapter: Adapter) -> bool: + if len(self.filters) == 0: + return False + + for filter in self.filters: + result = filter(message, adapter) + if result: + return True + + return False + + def add_filter(self, filter: Union[Filter, FilterProtocol]): + self.filters.append(filter) + return self diff --git a/recon_lw/interpretation/filter/function.py b/recon_lw/interpretation/filter/function.py new file mode 100644 index 0000000..e5691dc --- /dev/null +++ b/recon_lw/interpretation/filter/function.py @@ -0,0 +1,11 @@ +from recon_lw.interpretation.adapter import Adapter +from recon_lw.interpretation.filter.base import Filter +from recon_lw.core.type.types import Message + + +class FunctionFilter(Filter): + def __init__(self, filter_function): + self.filter_function = filter_function + + def filter(self, message: Message, adapter: Adapter) -> bool: + return self.filter_function(message, adapter) diff --git a/recon_lw/interpretation/filter/message_type.py b/recon_lw/interpretation/filter/message_type.py new file mode 100644 index 0000000..62a4704 --- /dev/null +++ b/recon_lw/interpretation/filter/message_type.py @@ -0,0 +1,12 @@ +from recon_lw.interpretation.filter.base import Filter +from typing import Set, Dict, Any +from recon_lw.interpretation.adapter.base import Adapter + + +class MessageTypeFilter(Filter): + def __init__(self, message_types: Set[str]): + self.message_types = message_types + + def filter(self, message: Dict[str, Any], adapter: Adapter) -> bool: + message_type = adapter.get_metadata(message)['messageType'] + return message_type not in self.message_types diff --git a/recon_lw/interpretation/filter/non_empty_field.py b/recon_lw/interpretation/filter/non_empty_field.py new file mode 100644 index 0000000..c70ee54 --- /dev/null +++ b/recon_lw/interpretation/filter/non_empty_field.py @@ -0,0 +1,20 @@ +from typing import List + +from recon_lw.core.type.types import Message +from recon_lw.interpretation.adapter import Adapter +from recon_lw.interpretation.filter.base import Filter + +class NonEmptyFilter(Filter): + def __init__(self, field_path: List[str]): + self.field_path = field_path + + def filter(self, message: Message, adapter: Adapter) -> bool: + body = adapter.get_body(message) + val = None + + for field in self.field_path: + val = body.get(field) + if val is None: + return True + + return val is None \ No newline at end of file diff --git a/recon_lw/interpretation/filter/session_alias.py b/recon_lw/interpretation/filter/session_alias.py new file mode 100644 index 0000000..e55e9bb --- /dev/null +++ b/recon_lw/interpretation/filter/session_alias.py @@ -0,0 +1,13 @@ +from recon_lw.interpretation.filter.base import Filter +from typing import Set, Dict, Any +from recon_lw.interpretation.adapter.base import Adapter + + +class SessionAliasFilter(Filter): + + def __init__(self, whitelisted_aliases: Set[str]): + self.whitelisted_aliases = whitelisted_aliases + + def filter(self, message: Dict[str, Any], adapter: Adapter) -> bool: + session_id = adapter.get_root_message_field(message, 'session_id', True) + return session_id not in self.whitelisted_aliases diff --git a/recon_lw/interpretation/interpretation_functions/__init__.py b/recon_lw/interpretation/interpretation_functions/__init__.py new file mode 100644 index 0000000..82e817c --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/__init__.py @@ -0,0 +1,4 @@ +from recon_lw.interpretation.interpretation_functions.base import * +from recon_lw.interpretation.interpretation_functions.event_type import * +from recon_lw.interpretation.interpretation_functions.simple import BasicInterpretationFunctionProvider, \ + BasicInterpretationFunctionProviderBuilder diff --git a/recon_lw/interpretation/interpretation_functions/base.py b/recon_lw/interpretation/interpretation_functions/base.py new file mode 100644 index 0000000..23bc9c6 --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/base.py @@ -0,0 +1,9 @@ +from abc import ABC, abstractmethod +from recon_lw.core.type.types import InterpretationFunctionType + + +class InterpretationFunctionProvider(ABC): + + @abstractmethod + def provide(self) -> InterpretationFunctionType: + pass diff --git a/recon_lw/interpretation/interpretation_functions/event_enhancement/__init__.py b/recon_lw/interpretation/interpretation_functions/event_enhancement/__init__.py new file mode 100644 index 0000000..7b47288 --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/event_enhancement/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.interpretation.interpretation_functions.event_enhancement.base import * +from recon_lw.interpretation.interpretation_functions.event_enhancement.enhancement_chain import * \ No newline at end of file diff --git a/recon_lw/interpretation/interpretation_functions/event_enhancement/base.py b/recon_lw/interpretation/interpretation_functions/event_enhancement/base.py new file mode 100644 index 0000000..3b41256 --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/event_enhancement/base.py @@ -0,0 +1,18 @@ +from abc import ABC, abstractmethod +from typing import Optional, Protocol +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import Message + + +class ReconEventEnhancement(ABC): + + def __call__(self, event, msg: Optional[Message], adapter: Adapter): + return self.enhance_event(event, msg, adapter) + + @abstractmethod + def enhance_event(self, event, msg: Optional[Message], adapter: Adapter): + pass + +class ReconEventEnhancementProtocol(Protocol): + def __call__(self, event, msg: Optional[Message], adapter: Adapter): + pass \ No newline at end of file diff --git a/recon_lw/interpretation/interpretation_functions/event_enhancement/enhancement_chain.py b/recon_lw/interpretation/interpretation_functions/event_enhancement/enhancement_chain.py new file mode 100644 index 0000000..eb2911b --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/event_enhancement/enhancement_chain.py @@ -0,0 +1,18 @@ +from recon_lw.interpretation.interpretation_functions.event_enhancement.base import ReconEventEnhancement, \ + ReconEventEnhancementProtocol +from typing import List, Optional +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import Message + + +class ReconEventChainEnhancement: + def __init__(self, enhancements: List[ReconEventEnhancementProtocol] = []): + self.enhancements = enhancements + + def add_enhancement(self, enhancement: ReconEventEnhancementProtocol): + self.enhancements.append(enhancement) + return self + + def apply(self, event, msg: Optional[Message], adapter: Adapter): + for enhancement in self.enhancements: + enhancement(event, msg, adapter) diff --git a/recon_lw/interpretation/interpretation_functions/event_handling_strategy/__init__.py b/recon_lw/interpretation/interpretation_functions/event_handling_strategy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/interpretation/interpretation_functions/event_handling_strategy/base.py b/recon_lw/interpretation/interpretation_functions/event_handling_strategy/base.py new file mode 100644 index 0000000..35d526b --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/event_handling_strategy/base.py @@ -0,0 +1,182 @@ +from typing import List, Dict, Iterator, Protocol, Union +from abc import ABC, abstractmethod + +from recon_lw.interpretation.adapter import Adapter +from recon_lw.interpretation.check_rule import FieldCheckResult +from recon_lw.interpretation.field_checker import FieldChecker +from recon_lw.interpretation.interpretation_functions.event_enhancement import ReconEventChainEnhancement +from recon_lw.interpretation.interpretation_functions.event_name_provider import ReconEventNameProvider +from recon_lw.interpretation.interpretation_functions.event_name_provider.base import ReconEventNameProviderProtocol +from recon_lw.interpretation.interpretation_functions.event_type import ReconType +from recon_lw.core.type.types import Message +from recon_lw.core.utility import create_event, Counters + + +class IEventHandlingStrategy(Protocol): + + def __call__(self, match_msgs: List[Message], event_sequence: dict, is_copy: bool, orig_adapter: Adapter, + copy_adapter: Adapter) -> List[Dict]: + pass + + +class SimpleMatchEventHandlingStrategy(IEventHandlingStrategy): + + def __init__( + self, + recon_name: str, + event_name_provider: Union[ReconEventNameProvider, ReconEventNameProviderProtocol], + fields_checker: FieldChecker, + counters, + recon_event_chain_enhancement: ReconEventChainEnhancement, + enrich_events_with_messages: bool = False, + ): + self._event_name_provider = event_name_provider + self._fields_checker = fields_checker + self._counters = counters + self._recon_event_chain_enhancement = recon_event_chain_enhancement + self.recon_name = recon_name + self.enrich_events_with_messages = enrich_events_with_messages + + def __call__(self, match_msgs: List[Message], event_sequence: dict, is_copy: bool, orig_adapter: Adapter, + copy_adapter: Adapter) -> List[Dict]: + + original = match_msgs[0] + copy = match_msgs[1] + if isinstance(self._event_name_provider, ReconEventNameProvider): + name = self._event_name_provider.get_match_event_name() + else: + name = self._event_name_provider(ReconType.BasicReconMatch) + + body = {} + diff_list = [] + differences: Iterator[FieldCheckResult] = self._fields_checker.compare(original, copy) + status = True + for fcr in differences: + status = False + diff_list.append( + dict(field_name=fcr.field, expected=fcr.left_val, actual=fcr.right_val) + ) + + order_ids = orig_adapter.get_fields_group(original, "order_ids") \ + or copy_adapter.get_fields_group(copy, "order_ids") + + if order_ids is not None: + body["order_ids"] = order_ids + + if not status: + if isinstance(self._event_name_provider, ReconEventNameProvider): + name = self._event_name_provider.get_match_diff_event_name() + else: + name = self._event_name_provider(ReconType.BasicReconMatch, False) + self._counters.match_fail += 1 + body['diff'] = diff_list + if self.enrich_events_with_messages: + body['messages'] = match_msgs + else: + self._counters.match_ok += 1 + + event = create_event( + recon_name=self.recon_name, + name=name, + type=ReconType.BasicReconMatch.value, + event_sequence=event_sequence, + body=body, + ok=status + ) + + event["attachedMessageIds"] = [m['messageId'] for m in match_msgs if m is not None] + if self._recon_event_chain_enhancement: + self._recon_event_chain_enhancement.apply(event, original, orig_adapter) + copy_adapter.on_message_exit(copy) + orig_adapter.on_message_exit(original) + + return [event] + + +class SimpleMissEventHandlingStrategy(IEventHandlingStrategy): + def __init__(self, + recon_name: str, + event_name_provider: Union[ReconEventNameProvider, ReconEventNameProviderProtocol], + fields_checker: FieldChecker, + counters, + recon_event_chain_enhancement: ReconEventChainEnhancement, + enrich_event_with_messages: bool = True + ): + self._event_name_provider = event_name_provider + self._fields_checker = fields_checker + self._counters = counters + self._recon_event_chain_enhancement = recon_event_chain_enhancement + self.recon_name = recon_name + self._enrich_event_with_messages = enrich_event_with_messages + + def __call__(self, match_msgs: List[Message], event_sequence: dict, is_copy: bool, orig_adapter: Adapter, + copy_adapter: Adapter): + if is_copy: + msg = match_msgs[1] + adapter = copy_adapter + else: + msg = match_msgs[0] + adapter = orig_adapter + + order_ids = adapter.get_fields_group(msg, "order_ids") + match_key = adapter.on_message_exit(msg) + + event = self._get_miss_event( + msg, + match_key=match_key, + recon_type=ReconType.BasicReconMissRight if is_copy else ReconType.BasicReconMissLeft, + event_sequence=event_sequence, + order_ids=order_ids, + event_name_provider=self._event_name_provider, + counters=self._counters + ) + + if self._recon_event_chain_enhancement: + self._recon_event_chain_enhancement.apply(event, msg, orig_adapter) + + if self._enrich_event_with_messages: + if is_copy: + event['body']['messages'] = match_msgs[1:] + else: + event['body']['messages'] = [match_msgs[0]] + adapter.on_message_exit(msg) + + return [event] + + def _get_miss_event(self, msg, event_name_provider: Union[ReconEventNameProvider, ReconEventNameProviderProtocol], + match_key, + recon_type: ReconType, + counters: Counters, + order_ids, + event_sequence): + if recon_type == ReconType.BasicReconMissLeft: + counters.no_left += 1 + if isinstance(event_name_provider, ReconEventNameProvider): + name = event_name_provider.get_miss_copy_event_name() + else: + name = event_name_provider(ReconType.BasicReconMissLeft) + elif recon_type == ReconType.BasicReconMissRight: + counters.no_right += 1 + if isinstance(event_name_provider, ReconEventNameProvider): + name = event_name_provider.get_miss_original_event_name() + else: + name = event_name_provider(ReconType.BasicReconMissRight) + else: + raise Exception('unexpected behaviour') + + body = {"key": match_key} + + if order_ids: + body["order_ids"] = order_ids + + event = create_event( + recon_name=self.recon_name, + name=name, + type=recon_type.value, + ok=False, + event_sequence=event_sequence, + body=body, + ) + event["attachedMessageIds"] = [msg["messageId"]] + + return event diff --git a/recon_lw/interpretation/interpretation_functions/event_name_provider/__init__.py b/recon_lw/interpretation/interpretation_functions/event_name_provider/__init__.py new file mode 100644 index 0000000..c8aef62 --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/event_name_provider/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.interpretation.interpretation_functions.event_name_provider.base import ReconEventNameProvider +from recon_lw.interpretation.interpretation_functions.event_name_provider.simple import BasicReconEventNameProvider diff --git a/recon_lw/interpretation/interpretation_functions/event_name_provider/base.py b/recon_lw/interpretation/interpretation_functions/event_name_provider/base.py new file mode 100644 index 0000000..0ffc359 --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/event_name_provider/base.py @@ -0,0 +1,26 @@ +from abc import ABC, abstractmethod +from typing import Protocol + +from recon_lw.interpretation.interpretation_functions import ReconType + + +class ReconEventNameProvider(ABC): + @abstractmethod + def get_miss_original_event_name(self): + pass + + @abstractmethod + def get_miss_copy_event_name(self): + pass + + @abstractmethod + def get_match_event_name(self): + pass + + @abstractmethod + def get_match_diff_event_name(self): + pass + +class ReconEventNameProviderProtocol(Protocol): + def __call__(self, event_type: ReconType, successful: bool=True): + pass \ No newline at end of file diff --git a/recon_lw/interpretation/interpretation_functions/event_name_provider/simple.py b/recon_lw/interpretation/interpretation_functions/event_name_provider/simple.py new file mode 100644 index 0000000..6d23e6e --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/event_name_provider/simple.py @@ -0,0 +1,18 @@ +from recon_lw.interpretation.interpretation_functions.event_name_provider import ReconEventNameProvider + + +class BasicReconEventNameProvider(ReconEventNameProvider): + def __init__(self, event_name_prefix: str) -> None: + self.event_name_prefix = event_name_prefix + + def get_miss_original_event_name(self): + return f"{self.event_name_prefix}[miss_original]" + + def get_miss_copy_event_name(self): + return f"{self.event_name_prefix}[miss_copy]" + + def get_match_event_name(self): + return f"{self.event_name_prefix}[match]" + + def get_match_diff_event_name(self): + return f"{self.event_name_prefix}[match[diff_found]]" diff --git a/template/recon_event_types.py b/recon_lw/interpretation/interpretation_functions/event_type.py similarity index 73% rename from template/recon_event_types.py rename to recon_lw/interpretation/interpretation_functions/event_type.py index 7585cfa..2df800c 100644 --- a/template/recon_event_types.py +++ b/recon_lw/interpretation/interpretation_functions/event_type.py @@ -1,7 +1,6 @@ from enum import Enum - class ReconType(Enum): BasicReconMatch = 'BasicReconMatch' BasicReconMissRight = 'BasicReconMissLeft' - BasicReconMissLeft = 'BasicReconMissRight' + BasicReconMissLeft = 'BasicReconMissRight' \ No newline at end of file diff --git a/recon_lw/interpretation/interpretation_functions/simple.py b/recon_lw/interpretation/interpretation_functions/simple.py new file mode 100644 index 0000000..7c9fa81 --- /dev/null +++ b/recon_lw/interpretation/interpretation_functions/simple.py @@ -0,0 +1,139 @@ +from recon_lw.interpretation.interpretation_functions.base import InterpretationFunctionProvider, InterpretationFunctionType +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.interpretation.interpretation_functions.event_name_provider import ReconEventNameProvider, \ + BasicReconEventNameProvider +from recon_lw.core.utility.counter import Counters +from recon_lw.interpretation.field_checker.base import FieldChecker +from recon_lw.interpretation.interpretation_functions.event_name_provider.base import ReconEventNameProviderProtocol +from recon_lw.matching.key_functions.base import KeyFunctionProvider +from recon_lw.interpretation.interpretation_functions.event_enhancement.enhancement_chain import ReconEventChainEnhancement +from recon_lw.core.type.types import Message +from typing import List, Optional, Union +from recon_lw.interpretation.interpretation_functions.event_handling_strategy.base import IEventHandlingStrategy, \ + SimpleMissEventHandlingStrategy, SimpleMatchEventHandlingStrategy + +class BasicInterpretationFunctionProvider(InterpretationFunctionProvider): + + def __init__( + self, + recon_name: str, + original_stream_adapter: Adapter, + copy_stream_adapter: Adapter, + fields_checker: FieldChecker, + enrich_event_with_messages: bool = False, + event_name_provider: Optional[Union[ReconEventNameProvider, ReconEventNameProviderProtocol]]=None, + recon_event_chain_enhancement: Optional[ReconEventChainEnhancement]=None, + match_event_handling_strategy: IEventHandlingStrategy=None, + miss_event_handling_strategy: IEventHandlingStrategy=None, + ): + self._original_stream_adapter = original_stream_adapter + self._copy_stream_adapter = copy_stream_adapter + self._event_name_provider = event_name_provider + self._fields_checker = fields_checker + self._counters = Counters() + self._recon_event_chain_enhancement = recon_event_chain_enhancement + + if event_name_provider is None: + event_name_provider = BasicReconEventNameProvider(recon_name) + + + if miss_event_handling_strategy is None: + miss_event_handling_strategy = SimpleMissEventHandlingStrategy( + recon_name=recon_name, + event_name_provider = event_name_provider, + fields_checker = fields_checker, + counters = self._counters, + recon_event_chain_enhancement = recon_event_chain_enhancement, + enrich_event_with_messages=enrich_event_with_messages + ) + + if match_event_handling_strategy is None: + match_event_handling_strategy = SimpleMatchEventHandlingStrategy( + recon_name=recon_name, + event_name_provider = event_name_provider, + fields_checker = fields_checker, + counters = self._counters, + recon_event_chain_enhancement = recon_event_chain_enhancement, + enrich_events_with_messages=enrich_event_with_messages + ) + + self.miss_event_handling_strategy = miss_event_handling_strategy + self.match_event_handling_strategy = match_event_handling_strategy + + def provide(self) -> InterpretationFunctionType: + def interpret(match_msgs: List[Message], _, event_sequence: dict): + if match_msgs is None: + return [] + original = match_msgs[0] + copy = match_msgs[1] + events = [] + + if original is not None and copy is not None: + events += self.match_event_handling_strategy(match_msgs, event_sequence, False, self._original_stream_adapter, self._copy_stream_adapter) + elif original is None and copy is not None: + events += self.miss_event_handling_strategy(match_msgs, event_sequence, True, self._original_stream_adapter, self._copy_stream_adapter) + elif copy is None and original is not None: + events += self.miss_event_handling_strategy(match_msgs, event_sequence, False, self._original_stream_adapter, self._copy_stream_adapter) + + return events + + return interpret + + +class BasicInterpretationFunctionProviderBuilder: + def __init__(self): + self._recon_name = "any" + self._original_stream_adapter = None + self._copy_stream_adapter = None + self._event_name_provider = None + self._fields_checker = None + self._counters = None + self._original_stream_key_function = None + self._copy_stream_key_function = None + self._recon_event_chain_enhancement = None + + def with_original_stream_adapter(self, adapter: Adapter): + self._original_stream_adapter = adapter + return self + + def with_copy_stream_adapter(self, adapter: Adapter): + self._copy_stream_adapter = adapter + return self + + def with_event_name_provider(self, provider: ReconEventNameProvider): + self._event_name_provider = provider + return self + + def with_fields_checker(self, checker: FieldChecker): + self._fields_checker = checker + return self + + def with_counters(self, counters: Counters): + self._counters = counters + return self + + def with_original_stream_key_function(self, provider: KeyFunctionProvider): + self._original_stream_key_function = provider + return self + + def with_copy_stream_key_function(self, provider: KeyFunctionProvider): + self._copy_stream_key_function = provider + return self + + def with_recon_event_chain_enhancement(self, enhancement: ReconEventChainEnhancement): + self._recon_event_chain_enhancement = enhancement + return self + + def with_recon_name(self, name): + self._recon_name = name + return self + + def build(self) -> BasicInterpretationFunctionProvider: + return BasicInterpretationFunctionProvider( + recon_name=self._recon_name, + original_stream_adapter=self._original_stream_adapter, + copy_stream_adapter=self._copy_stream_adapter, + event_name_provider=self._event_name_provider, + fields_checker=self._fields_checker, + recon_event_chain_enhancement=self._recon_event_chain_enhancement + ) diff --git a/recon_lw/recon_ob_stats.py b/recon_lw/interpretation/recon_ob_stats.py similarity index 92% rename from recon_lw/recon_ob_stats.py rename to recon_lw/interpretation/recon_ob_stats.py index 2ada7b7..d4fbe5c 100644 --- a/recon_lw/recon_ob_stats.py +++ b/recon_lw/interpretation/recon_ob_stats.py @@ -2,12 +2,12 @@ from datetime import datetime from typing import List -from recon_lw.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str, time_stamp_key +from recon_lw.core.ts_converters import epoch_nano_str_to_ts -from recon_lw import recon_lw -from recon_lw.EventsSaver import EventsSaver -from recon_lw.LastStateMatcher import LastStateMatcher -from recon_lw.message_utils import message_to_dict +from recon_lw.core.EventsSaver import EventsSaver +from recon_lw.core.utility import open_scoped_events_streams, open_streams, get_next_batch +from recon_lw.matching.LastStateMatcher import LastStateMatcher +from recon_lw.core.message_utils import message_to_dict import copy from th2_data_services.config import options @@ -92,9 +92,9 @@ def ob_compare_stats(source_stat_messages_path: pathlib.PosixPath, ) processors.append(processor) - streams = recon_lw.open_scoped_events_streams(source_ob_events_path, + streams = open_scoped_events_streams(source_ob_events_path, lambda n: "default_" not in n) - streams2 = recon_lw.open_streams(source_stat_messages_path, + streams2 = open_streams(source_stat_messages_path, lambda n: any(s in n for s in all_stat_sessions), expanded_messages=True, data_objects=data_objects) for elem in streams2: @@ -103,7 +103,7 @@ def ob_compare_stats(source_stat_messages_path: pathlib.PosixPath, message_buffer = [None] * 100 buffer_len = 100 while len(streams) > 0: - next_batch_len = recon_lw.get_next_batch(streams, message_buffer, buffer_len, get_timestamp) + next_batch_len = get_next_batch(streams, message_buffer, buffer_len, get_timestamp) buffer_to_process = message_buffer if next_batch_len < buffer_len: buffer_to_process = message_buffer[:next_batch_len] diff --git a/recon_lw/LastStateMatcher.py b/recon_lw/matching/LastStateMatcher.py similarity index 97% rename from recon_lw/LastStateMatcher.py rename to recon_lw/matching/LastStateMatcher.py index 6e91b73..2456b78 100644 --- a/recon_lw/LastStateMatcher.py +++ b/recon_lw/matching/LastStateMatcher.py @@ -3,14 +3,13 @@ from sortedcontainers import SortedKeyList -from recon_lw.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str, time_stamp_key +from recon_lw.core.ts_converters import time_stamp_key -from recon_lw import recon_lw from th2_data_services.utils import time as time_utils -from recon_lw.EventsSaver import IEventsSaver -from recon_lw._types import Th2Timestamp -from recon_lw.stream import Streams +from recon_lw.core.EventsSaver import IEventsSaver +from recon_lw.core._types import Th2Timestamp +from recon_lw.core.stream import Streams class IInterpretHandler(ABC): @@ -48,6 +47,7 @@ def handler(self, The method should return message-timestamp and the matching key for the stream-1. For the first stream (left). + Read about stream 1 in LastStateMatcher. Args: @@ -71,6 +71,7 @@ def handler(self, stream-2 and . For the second stream (right) + Read about stream 2 in LastStateMatcher. Args: @@ -109,6 +110,7 @@ def __init__(self, LastStateMatcher assumes that there are 2 unequal streams. `1st stream` is the same thing, but received at a random point in time. + For example: [1] MarketData snapshots. This is an OrderBook state stream aggregated over time. For example @@ -125,6 +127,7 @@ def __init__(self, of the order book at a certain point in time. This stream has no gaps. We know all the states of the OrderBook. + Args: horizon_delay_seconds: get_search_ts_key: For the first stream (left) @@ -136,7 +139,7 @@ def __init__(self, create_event: send_events: """ - self._search_time_index = Streams() # stream1 + self._search_time_index = Streams() # _state_cache: {key2 : { # "prior_ts" : ts, diff --git a/recon_lw/LiveObjectsCache.py b/recon_lw/matching/LiveObjectsCache.py similarity index 94% rename from recon_lw/LiveObjectsCache.py rename to recon_lw/matching/LiveObjectsCache.py index c35c74b..435d334 100644 --- a/recon_lw/LiveObjectsCache.py +++ b/recon_lw/matching/LiveObjectsCache.py @@ -1,9 +1,6 @@ from sortedcontainers import SortedKeyList -from recon_lw.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str, time_stamp_key - -from recon_lw import recon_lw -from datetime import datetime +from recon_lw.core.ts_converters import time_stamp_key class LiveObjectsCache: diff --git a/recon_lw/StateSequenceGenerator.py b/recon_lw/matching/StateSequenceGenerator.py similarity index 94% rename from recon_lw/StateSequenceGenerator.py rename to recon_lw/matching/StateSequenceGenerator.py index a324c93..6773b68 100644 --- a/recon_lw/StateSequenceGenerator.py +++ b/recon_lw/matching/StateSequenceGenerator.py @@ -1,12 +1,8 @@ from typing import Callable -from sortedcontainers import SortedKeyList +from recon_lw.core.ts_converters import time_stamp_key -from recon_lw.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str, time_stamp_key - -from recon_lw import recon_lw -from th2_data_services.utils import time as time_utils -from recon_lw.SequenceCache import SequenceCache +from recon_lw.core.SequenceCache import SequenceCache import copy diff --git a/recon_lw/TimeCacheMatcher.py b/recon_lw/matching/TimeCacheMatcher.py similarity index 98% rename from recon_lw/TimeCacheMatcher.py rename to recon_lw/matching/TimeCacheMatcher.py index 049806b..d0086a3 100644 --- a/recon_lw/TimeCacheMatcher.py +++ b/recon_lw/matching/TimeCacheMatcher.py @@ -1,6 +1,6 @@ from sortedcontainers import SortedKeyList -from recon_lw.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str, time_stamp_key +from recon_lw.core.ts_converters import time_stamp_key class TimeCacheMatcher: diff --git a/recon_lw/matching/__init__.py b/recon_lw/matching/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/matching/collect_matcher/__init__.py b/recon_lw/matching/collect_matcher/__init__.py new file mode 100644 index 0000000..690efcb --- /dev/null +++ b/recon_lw/matching/collect_matcher/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.matching.collect_matcher.default import DefaultCollectMatcher +from recon_lw.matching.collect_matcher.base import CollectMatcher \ No newline at end of file diff --git a/recon_lw/matching/collect_matcher/base.py b/recon_lw/matching/collect_matcher/base.py new file mode 100644 index 0000000..fefbd96 --- /dev/null +++ b/recon_lw/matching/collect_matcher/base.py @@ -0,0 +1,27 @@ +from abc import ABC, abstractmethod +from typing import Optional, List, Dict, Protocol +from recon_lw.core.rule.base import AbstractRule +from recon_lw.core.type.types import Message + + +class CollectMatcher(ABC): + """ + Abstract base class for defining a matching flow. + + This class defines the interface for matcher implementations that collect + matches between messages based on a given rule. + + Methods: + collect_matches: Abstract method to collect matches based on a rule. + """ + + def __call__(self, batch: List[Optional[Dict]], rule: AbstractRule): + return self.collect_matches(batch, rule) + + @abstractmethod + def collect_matches(self, batch: List[Optional[Dict]], rule: AbstractRule): + pass + +class CollectMatcherProtocol(Protocol): + def __call__(self, batch: List[Message], state: dict): + pass \ No newline at end of file diff --git a/recon_lw/matching/collect_matcher/default.py b/recon_lw/matching/collect_matcher/default.py new file mode 100644 index 0000000..44e4a26 --- /dev/null +++ b/recon_lw/matching/collect_matcher/default.py @@ -0,0 +1,28 @@ +from recon_lw.matching.LastStateMatcher import LastStateMatcher +from typing import List, Optional, Dict, Union +from recon_lw.core.rule.base import AbstractRule +from typing import Callable + +from recon_lw.matching.stream_matcher import ReconMatcher +from recon_lw.matching.collect_matcher.base import CollectMatcher + + +class DefaultCollectMatcher(CollectMatcher): + + def __init__(self, + match_function: Union[ReconMatcher, Callable], + last_state_matcher: Optional[LastStateMatcher]=None, + ): + self.last_state_matcher = last_state_matcher + self.match_function = match_function + + def collect_matches(self, batch: List[Optional[Dict]], rule: AbstractRule): + match_func = self.match_function + + if isinstance(match_func, Callable): + return match_func(batch, rule.to_dict()) + elif isinstance(match_func, ReconMatcher): + return match_func.match(batch, rule) + + if self.last_state_matcher: + return self.last_state_matcher.process_objects_batch(batch) diff --git a/recon_lw/matching/flush_function/__init__.py b/recon_lw/matching/flush_function/__init__.py new file mode 100644 index 0000000..dad12dc --- /dev/null +++ b/recon_lw/matching/flush_function/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.matching.flush_function.base import * +from recon_lw.matching.flush_function.default import * \ No newline at end of file diff --git a/recon_lw/matching/flush_function/base.py b/recon_lw/matching/flush_function/base.py new file mode 100644 index 0000000..84ee531 --- /dev/null +++ b/recon_lw/matching/flush_function/base.py @@ -0,0 +1,26 @@ +from recon_lw.core.rule.base import AbstractRule +from typing import Optional, Callable, Protocol +from abc import ABC, abstractmethod + +class FlushFunction(ABC): + + def __call__(self, + timestamp: Optional[float], + rule: AbstractRule, + save_events_func: Callable[[dict], None] + ): + return self.flush(timestamp, rule, save_events_func) + @abstractmethod + def flush(self, + timestamp: Optional[float], + rule: AbstractRule, + save_events_func: Callable[[dict], None] + ): + pass + +class FlushFunctionProtocol(Protocol): + def __call__(self, + timestamp: Optional[float], + rule: dict, + save_events_func: Callable[[dict], None]): + pass \ No newline at end of file diff --git a/recon_lw/matching/flush_function/default.py b/recon_lw/matching/flush_function/default.py new file mode 100644 index 0000000..fd482a4 --- /dev/null +++ b/recon_lw/matching/flush_function/default.py @@ -0,0 +1,79 @@ +from recon_lw.core.type.types import InterpretationFunctionType +from recon_lw.core.utility.recon_utils import * +from recon_lw.matching.LastStateMatcher import LastStateMatcher +from recon_lw.matching.flush_function.base import FlushFunction +from recon_lw.core.rule.base import AbstractRule +from typing import Callable + +from recon_lw.matching.init_function import SimpleMatcherContext + + +class DefaultFlushFunction(FlushFunction): + def __init__(self, + interpretation_function: InterpretationFunctionType, + last_state_matcher: LastStateMatcher = None + ): + self.interpretation_function = interpretation_function + self.last_state_matcher = last_state_matcher + + def flush(self, + timestamp: Optional[float], + rule: AbstractRule, + save_events_func: Callable[[dict], None] + ): + if not isinstance(rule.matcher_context, SimpleMatcherContext): + raise ValueError('Matcher context must be SimpleMatcherContext or its extension.') + DefaultFlushFunction.rule_flush( + timestamp, + rule.horizon_delay, + rule.matcher_context.match_index, + rule.matcher_context.time_index, + rule.matcher_context.message_cache, + self.interpretation_function, + rule.get_event_sequence(), + save_events_func, + rule.get_root_event(), + self.last_state_matcher + ) + + @staticmethod + def rule_flush(current_ts, horizon_delay, match_index: dict, time_index, message_cache, + interpret_func, event_sequence: dict, send_events_func, + parent_event, live_orders_cache): + + def flush_old(current_ts, horizon_delay, time_index): + result = [] + horizon_edge = len(time_index) + if current_ts is not None: + edge_timestamp = {"epochSecond": current_ts["epochSecond"] - horizon_delay, + "nano": 0} + horizon_edge = time_index.bisect_key_left( + time_stamp_key(edge_timestamp)) + + if horizon_edge > 0: + n = 0 + while n < horizon_edge: + nxt = time_index.pop(0) + result.append(nxt[1]) + n += 1 + return result + + old_keys = flush_old(current_ts, horizon_delay, time_index) + events = [] + for match_key in old_keys: + elem = match_index.pop(match_key) + if elem[0] is not None and elem[0] not in message_cache: + continue + if isinstance(interpret_func, Callable): + results = interpret_func( + [message_cache_pop(item, message_cache) for item in elem], + live_orders_cache, + event_sequence + ) + + if results is not None: + for r in results: + r["parentEventId"] = parent_event["eventId"] + events.append(r) + + send_events_func(events) \ No newline at end of file diff --git a/recon_lw/matching/init_function/__init__.py b/recon_lw/matching/init_function/__init__.py new file mode 100644 index 0000000..3c4bdd5 --- /dev/null +++ b/recon_lw/matching/init_function/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.matching.init_function.base import * +from recon_lw.matching.init_function.default import * diff --git a/recon_lw/matching/init_function/base.py b/recon_lw/matching/init_function/base.py new file mode 100644 index 0000000..1befd7a --- /dev/null +++ b/recon_lw/matching/init_function/base.py @@ -0,0 +1,9 @@ +from abc import ABC, abstractmethod + +from recon_lw.matching.init_function.context.base import AbstractMatcherContext + + +class MatcherContextProvider(ABC): + @abstractmethod + def get_context(self) -> AbstractMatcherContext: + pass \ No newline at end of file diff --git a/recon_lw/matching/init_function/context/__init__.py b/recon_lw/matching/init_function/context/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/matching/init_function/context/base.py b/recon_lw/matching/init_function/context/base.py new file mode 100644 index 0000000..4c3094c --- /dev/null +++ b/recon_lw/matching/init_function/context/base.py @@ -0,0 +1,6 @@ +from abc import ABC, abstractmethod + +class AbstractMatcherContext(ABC): + @abstractmethod + def to_dict(self) -> dict: + pass \ No newline at end of file diff --git a/recon_lw/matching/init_function/context/simple.py b/recon_lw/matching/init_function/context/simple.py new file mode 100644 index 0000000..92308f9 --- /dev/null +++ b/recon_lw/matching/init_function/context/simple.py @@ -0,0 +1,17 @@ +from recon_lw.matching.init_function.context.base import AbstractMatcherContext +from sortedcontainers import SortedKeyList + +class SimpleMatcherContext(AbstractMatcherContext): + + def __init__(self, match_index: dict, time_index: dict, message_cache: dict): + self.match_index = match_index + self.time_index = time_index + self.message_cache = message_cache + + def to_dict(self) -> dict: + return { + 'match_index': self.match_index, + 'time_index': self.time_index, + 'message_cache': self.message_cache + } + diff --git a/recon_lw/matching/init_function/default.py b/recon_lw/matching/init_function/default.py new file mode 100644 index 0000000..3375656 --- /dev/null +++ b/recon_lw/matching/init_function/default.py @@ -0,0 +1,18 @@ +from sortedcontainers import SortedKeyList +from recon_lw.matching.init_function.base import MatcherContextProvider + +from recon_lw.core.ts_converters import time_stamp_key +from recon_lw.matching.init_function.context.simple import SimpleMatcherContext + + +class DefaultMatcherContextProvider(MatcherContextProvider): + + def __init__(self): + super().__init__() + self.context = SimpleMatcherContext( + match_index={}, + time_index=SortedKeyList(key=lambda t: time_stamp_key(t[0])), + message_cache={} + ) + def get_context(self) -> SimpleMatcherContext: + return self.context diff --git a/recon_lw/matching/key_functions/__init__.py b/recon_lw/matching/key_functions/__init__.py new file mode 100644 index 0000000..8523cea --- /dev/null +++ b/recon_lw/matching/key_functions/__init__.py @@ -0,0 +1,3 @@ +from recon_lw.matching.key_functions.base import * +from recon_lw.matching.key_functions.simple_copy import * +from recon_lw.matching.key_functions.simple_original import * \ No newline at end of file diff --git a/recon_lw/matching/key_functions/base.py b/recon_lw/matching/key_functions/base.py new file mode 100644 index 0000000..c80523c --- /dev/null +++ b/recon_lw/matching/key_functions/base.py @@ -0,0 +1,14 @@ +from abc import ABC, abstractmethod +from typing import Protocol, Any + +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import KeyFunctionType + +class KeyFunctionProvider(ABC): + @abstractmethod + def provide(self, adapter: Adapter) -> KeyFunctionType: + pass + +class KeyFunction(Protocol): + def __call__(self, msg) -> Any: + pass diff --git a/recon_lw/matching/key_functions/default.py b/recon_lw/matching/key_functions/default.py new file mode 100644 index 0000000..6dd2bdf --- /dev/null +++ b/recon_lw/matching/key_functions/default.py @@ -0,0 +1,23 @@ +from typing import List, Set, Union + +from recon_lw.interpretation.filter import Filter, FilterChain, FilterProtocol +from recon_lw.matching.key_functions import BasicCopyKeyFunctionProvider, BasicOriginalKeyFunctionProvider +from recon_lw.matching.matching_key_extractor import BasicSeparatorMatchingKeyExtractor + + +def default_key_function(filters: List[Union[Filter, FilterProtocol]], key_fields: Set[str], is_copy=False): + chain = FilterChain() + for filter in filters: + chain.add_filter(filter) + if is_copy: + return BasicCopyKeyFunctionProvider( + filter_chain=chain, + matching_key=BasicSeparatorMatchingKeyExtractor(separator=':'), + key_fields=key_fields + ) + else: + return BasicOriginalKeyFunctionProvider( + filter_chain=chain, + matching_key=BasicSeparatorMatchingKeyExtractor(separator=':'), + key_fields=key_fields + ) \ No newline at end of file diff --git a/recon_lw/matching/key_functions/simple_copy.py b/recon_lw/matching/key_functions/simple_copy.py new file mode 100644 index 0000000..d74e371 --- /dev/null +++ b/recon_lw/matching/key_functions/simple_copy.py @@ -0,0 +1,29 @@ +from recon_lw.matching.key_functions.base import KeyFunctionProvider +from recon_lw.interpretation.filter import FilterChain, Filter +from recon_lw.matching.matching_key_extractor import BasicSeparatorMatchingKeyExtractor +from recon_lw.matching.matching_key_extractor.base import MatchingKeyExtractor, MatchingKeyExtractorProtocol +from typing import Set, List +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import KeyFunctionType, Message + +class BasicCopyKeyFunctionProvider(KeyFunctionProvider): + + def __init__(self, filter_chain: FilterChain, matching_key: MatchingKeyExtractorProtocol, key_fields: Set[str]): + super().__init__() + self._filter_chain = filter_chain + self._matching_key = matching_key + self._key_fields = key_fields + + def provide(self, adapter: Adapter) -> KeyFunctionType: + def key_function(message: Message): + if not self._filter_chain.filter(message, adapter): + adapter.on_message(message) + mks = self._matching_key(adapter, message, self._key_fields) + if len(mks) > 1: + raise SystemError( + f"Copy matching fun can have only single value, received {mks}" + ) + return mks[0] + + + return key_function \ No newline at end of file diff --git a/recon_lw/matching/key_functions/simple_original.py b/recon_lw/matching/key_functions/simple_original.py new file mode 100644 index 0000000..949ebb0 --- /dev/null +++ b/recon_lw/matching/key_functions/simple_original.py @@ -0,0 +1,22 @@ +from recon_lw.matching.key_functions.base import KeyFunctionProvider +from recon_lw.interpretation.filter import FilterChain +from recon_lw.matching.matching_key_extractor.base import MatchingKeyExtractor, MatchingKeyExtractorProtocol +from typing import Set +from recon_lw.interpretation.adapter.base import Adapter +from recon_lw.core.type.types import KeyFunctionType, Message + +class BasicOriginalKeyFunctionProvider(KeyFunctionProvider): + + def __init__(self, filter_chain: FilterChain, matching_key: MatchingKeyExtractorProtocol, key_fields: Set[str]): + super().__init__() + self._filter_chain = filter_chain + self._matching_key = matching_key + self._key_fields = key_fields + + def provide(self, adapter: Adapter) -> KeyFunctionType: + def key_function(message: Message): + if not self._filter_chain.filter(message, adapter): + adapter.on_message(message) + return self._matching_key(adapter, message, self._key_fields) + + return key_function \ No newline at end of file diff --git a/recon_lw/matching/matching_key_extractor/__init__.py b/recon_lw/matching/matching_key_extractor/__init__.py new file mode 100644 index 0000000..db8dc99 --- /dev/null +++ b/recon_lw/matching/matching_key_extractor/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.matching.matching_key_extractor.base import * +from recon_lw.matching.matching_key_extractor.separator import * \ No newline at end of file diff --git a/recon_lw/matching/matching_key_extractor/base.py b/recon_lw/matching/matching_key_extractor/base.py new file mode 100644 index 0000000..4e29786 --- /dev/null +++ b/recon_lw/matching/matching_key_extractor/base.py @@ -0,0 +1,16 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, Set, List, Protocol +from recon_lw.interpretation.adapter.base import Adapter + +class MatchingKeyExtractor(ABC): + + def __call__(self, adapter: Adapter, message: Dict[str, Any], fields: Set[str]) -> List[str]: + return self.extract(adapter, message, fields) + + @abstractmethod + def extract(self, adapter: Adapter, message: Dict[str, Any], fields: Set[str]) -> List[str]: + pass + +class MatchingKeyExtractorProtocol(Protocol): + def __call__(self, adapter: Adapter, message: Dict[str, Any], fields: Set[str]) -> List[str]: + pass diff --git a/recon_lw/matching/matching_key_extractor/separator.py b/recon_lw/matching/matching_key_extractor/separator.py new file mode 100644 index 0000000..8ed885b --- /dev/null +++ b/recon_lw/matching/matching_key_extractor/separator.py @@ -0,0 +1,47 @@ +from typing import Any, Dict, Set, List +from recon_lw.matching.matching_key_extractor.base import MatchingKeyExtractor + +from recon_lw.interpretation.adapter.base import Adapter + + +class BasicSeparatorMatchingKeyExtractor(MatchingKeyExtractor): + + def __init__(self, separator: str): + self.separator = separator + + def extract(self, adapter: Adapter, message: Dict[str, Any], fields: Set[str]) -> List[str]: + def scale_item(val, count): + if len(val) == count: + for x in val: + yield x + else: + v = val[0] + for _ in range(count): + yield v + + result = [] + items = {} + max_count = 1 + for field in fields: + val = adapter.get(message, field, strict=True) + if not isinstance(val, list): + val = [val] + items[field] = val + l = len(val) + if l != 1 and max_count != 1 and l != max_count: + raise SystemError( + f"Diff found {max_count} != {l} | {adapter.__class__.__name__} " + f"| {field} | {message}" + ) + + max_count = max(max_count, len(val)) + + z = list( + zip( + *(scale_item([str(x) for x in items[field]], max_count) for field in + fields) + ) + ) + + result = [self.separator.join(chunks) for chunks in z] + return result diff --git a/recon_lw/matching/old/__init__.py b/recon_lw/matching/old/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/matching/old/matching.py b/recon_lw/matching/old/matching.py new file mode 100644 index 0000000..eed986c --- /dev/null +++ b/recon_lw/matching/old/matching.py @@ -0,0 +1,133 @@ +from typing import Any + +from sortedcontainers import SortedKeyList +from th2_data_services.config import options + +from recon_lw.core.ts_converters import time_stamp_key +from recon_lw.core.utility import time_index_add, message_cache_add +from recon_lw.matching.old.utils import rule_flush + + +def init_matcher(rule_settings): + rule_settings["match_index"] = {} + rule_settings["time_index"] = SortedKeyList(key=lambda t: time_stamp_key(t[0])) + rule_settings["message_cache"] = {} + +def collect_matcher(batch, rule_settings): + rule_match_func = rule_settings["rule_match_func"] + rule_match_func(batch, rule_settings) + if "live_orders_cache" in rule_settings: + rule_settings["live_orders_cache"].process_objects_batch(batch) + +def flush_matcher(ts, rule_settings, event_sequence: dict, save_events_func): + rule_flush(ts, + rule_settings["horizon_delay"], + rule_settings["match_index"], + rule_settings["time_index"], + rule_settings["message_cache"], + rule_settings["interpret_func"], + event_sequence, + save_events_func, + rule_settings["rule_root_event"], + rule_settings["live_orders_cache"] if "live_orders_cache" in rule_settings else None) + +def one_many_match(next_batch, rule_dict): + """ + One to Many matching algorithm. + + It's expected that `first_key_func` will return [ke1, key2, ...] for + this type of matching + + If first_key_func will return the same value for keys -- they will be + removed as duplicates. + Second key func -- messages with the same key will be added to result and + provided to interpr func as [_, 2nd_key_match1, 2nd_key_match2, ...] + + Args: + next_batch: + rule_dict: + + Returns: + + """ + # match_index: dict[Any, MatchIndexElement] = rule_dict["match_index"] + match_index: dict[Any, list] = rule_dict["match_index"] + time_index = rule_dict["time_index"] + message_cache = rule_dict["message_cache"] + first_key_func = rule_dict["first_key_func"] + second_key_func = rule_dict["second_key_func"] + + n_duplicates = 0 + for m in next_batch: + first_keys = first_key_func(m) + message_id = options.mfr.get_id(m) + if first_keys is not None: + match_index_element = [message_id, None] + for first_key in first_keys: + if first_key not in match_index: + match_index[first_key] = match_index_element + time_index_add(first_key, m, time_index) + message_cache_add(m, message_cache) + continue + else: + existing = match_index[first_key] + if existing[0] is not None: + n_duplicates += 1 + else: + existing[0] = message_id + message_cache_add(m, message_cache) + continue + second_key = second_key_func(m) + if second_key is not None: + if second_key not in match_index: + match_index[second_key] = [None, message_id] + time_index_add(second_key, m, time_index) + message_cache_add(m, message_cache) + else: + existing = match_index[second_key] + if existing[1] is None: # existing[1] - stream 2 message ID + existing[1] = message_id + # match_index[second_key] = [existing[0], message_id] + message_cache_add(m, message_cache) + else: + existing.append(message_id) + message_cache_add(m, message_cache) + + if n_duplicates > 0: + print(n_duplicates, " duplicates detected") + +def pair_one_match(next_batch, rule_dict): + # first_key_func takes m returns string(key) + match_index = rule_dict["match_index"] + time_index = rule_dict["time_index"] + message_cache = rule_dict["message_cache"] + pair_key_func = rule_dict["pair_key_func"] + one_key_func = rule_dict["one_key_func"] + + for m in next_batch: + pair_key = pair_key_func(m) + message_id = options.mfr.get_id(m) + if pair_key is not None: + if pair_key not in match_index: + match_index[pair_key] = [message_id, None, None] + time_index_add(pair_key, m, time_index) + message_cache_add(m, message_cache) + else: + element = match_index[pair_key] + if element[0] is None: + element[0] = message_id + message_cache_add(m, message_cache) + elif element[1] is None: + element[1] = message_id + message_cache_add(m, message_cache) + one_key = one_key_func(m) + if one_key is not None: + if one_key not in match_index: + match_index[one_key] = [None, None, message_id] + time_index_add(one_key, m, time_index) + message_cache_add(m, message_cache) + else: + element = match_index[one_key] + if element[2] is None: + element[2] = message_id + message_cache_add(m, message_cache) \ No newline at end of file diff --git a/recon_lw/matching/old/utils.py b/recon_lw/matching/old/utils.py new file mode 100644 index 0000000..ea54c30 --- /dev/null +++ b/recon_lw/matching/old/utils.py @@ -0,0 +1,49 @@ +from recon_lw.core.ts_converters import time_stamp_key +from recon_lw.core.utility import message_cache_pop + + +def flush_old(current_ts, horizon_delay, time_index): + result = [] + horizon_edge = len(time_index) + if current_ts is not None: + edge_timestamp = {"epochSecond": current_ts["epochSecond"] - horizon_delay, + "nano": 0} + horizon_edge = time_index.bisect_key_left( + time_stamp_key(edge_timestamp)) + + if horizon_edge > 0: + n = 0 + while n < horizon_edge: + nxt = time_index.pop(0) + result.append(nxt[1]) + n += 1 + return result + +def rule_flush(current_ts, horizon_delay, match_index: dict, time_index, message_cache, + interpret_func, event_sequence: dict, send_events_func, + parent_event, live_orders_cache): + old_keys = flush_old(current_ts, horizon_delay, time_index) + events = [] + for match_key in old_keys: + elem = match_index.pop(match_key) # elem -- can have 2 or 3 elements inside + if elem[0] is not None and elem[0] not in message_cache: + # request already processed through different key + continue + + # interpret_func function has exact format + # arg0 - list of matched messages + # arg1 - ?? + # arg2 - EventSequence + results = interpret_func( + [message_cache_pop(item, message_cache) for item in elem], + live_orders_cache, + event_sequence + ) + # result = interpret_func(message_cache_pop(elem[0], message_cache), + # message_cache_pop(elem[1], message_cache), event_sequence) + if results is not None: + for r in results: + r["parentEventId"] = parent_event["eventId"] + events.append(r) + + send_events_func(events) \ No newline at end of file diff --git a/recon_lw/recon_ob.py b/recon_lw/matching/recon_ob.py similarity index 97% rename from recon_lw/recon_ob.py rename to recon_lw/matching/recon_ob.py index 113dc84..92a23c3 100644 --- a/recon_lw/recon_ob.py +++ b/recon_lw/matching/recon_ob.py @@ -1,20 +1,20 @@ -from datetime import datetime, timedelta +from datetime import datetime from pathlib import Path from typing import Callable from sortedcontainers import SortedKeyList from th2_data_services.data import Data -from th2_data_services.utils.message_utils import message_utils -from recon_lw.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str, time_stamp_key -from recon_lw import recon_lw +from recon_lw.core.ts_converters import time_stamp_key from th2_data_services.utils import time as time_utils -from recon_lw.EventsSaver import EventsSaver -from recon_lw.SequenceCache import SequenceCache +from recon_lw.core.EventsSaver import EventsSaver +from recon_lw.core.SequenceCache import SequenceCache from th2_data_services.config import options import copy +from recon_lw.core.utility import create_event + def combine_operations(operations_list): combined_operations = [[]] @@ -64,7 +64,7 @@ def process_operations_batch(operations_batch, events, book_id, book, check_book result["resulting_book"] = ob_copy(book) result["book_id"] = book_id result["sessionId"] = mess["sessionId"] - update_event = recon_lw.create_event("UpdateBookError:" + parent_event["eventName"], + update_event = create_event("UpdateBookError:" + parent_event["eventName"], "UpdateBookError", event_sequence, ok=False, @@ -242,7 +242,7 @@ def read_snapshot(expanded_snapshots_stream_iter, rule_settings, saveEvents=True if log_book['v'] >= log_books[log_book['book_id']]['v']: log_books[log_book['book_id']] = log_book filtered_log_books_collection = list(log_books.values()) - filtered_log_books_collection. sort(key=lambda d: recon_lw.time_stamp_key(d['timestamp'])) + filtered_log_books_collection. sort(key=lambda d: time_stamp_key(d['timestamp'])) if saveEvents: for log_book in filtered_log_books_collection: log_event = rule_settings.events_saver.create_event( @@ -286,12 +286,12 @@ def read_all_snapshots(snapshots_stream: Data, snapshot_stop_func: Callable, def process_market_data_update(mess_batch, events, books_cache, get_book_id_func, update_book_rule, check_book_rule, event_sequence, parent_event, initial_book_params, log_books_filter, - log_books_collection, aggregate_batch_updates,meta_extract=None): + log_books_collection, aggregate_batch_updates, meta_extract=None): books_updates = {} for m in mess_batch: book_ids_list, result = get_book_id_func(m) if result is not None: - book_id_event = recon_lw.create_event("GetBookError:" + parent_event["eventName"], + book_id_event = create_event("GetBookError:" + parent_event["eventName"], "GetBookError", event_sequence, ok=False, @@ -321,7 +321,7 @@ def process_market_data_update(mess_batch, events, books_cache, get_book_id_func process_operations_batch(chunk, events, book_id, book, check_book_rule, event_sequence, parent_event, log_books_filter, log_books_collection, - aggregate_batch_updates, meta_extract) + aggregate_batch_updates) def process_ob_rules(sequenced_batch: SortedKeyList, books_cache: dict, get_book_id_func, @@ -358,11 +358,11 @@ def process_ob_rules(sequenced_batch: SortedKeyList, books_cache: dict, get_book process_market_data_update(messages_chunk, events, books_cache, get_book_id_func, update_book_rule, check_book_rule, event_sequence, parent_event, initial_book_params, - log_books_filter, log_books_collection, aggregate_batch_updates,meta_extract) + log_books_filter, log_books_collection, aggregate_batch_updates, meta_extract) log_books_collection.sort(key=lambda d: time_stamp_key(d["timestamp"])) for log_book in log_books_collection: - log_event = recon_lw.create_event("OrderBook:" + log_book["sessionId"], + log_event = create_event("OrderBook:" + log_book["sessionId"], "OrderBook", event_sequence, ok=True, @@ -446,7 +446,7 @@ def flush_ob_stream(ts: dict, rule_settings: dict, event_sequence: dict, save_ev ## Gaps gaps = rule_settings["sequence_cache"].get_next_gaps() if len(gaps) > 0: - gap_event = recon_lw.create_event("SeqGap:" + rule_settings["rule_root_event"]["eventName"], "SeqGap", + gap_event = create_event("SeqGap:" + rule_settings["rule_root_event"]["eventName"], "SeqGap", event_sequence, ok=False, body={"sessionId": rule_settings["sessionId"], "gaps": gaps}, @@ -459,7 +459,7 @@ def flush_ob_stream(ts: dict, rule_settings: dict, event_sequence: dict, save_ev dupl_events = [] for i in range(0, n_dupl): item = duplicates.pop(0) - d_ev = recon_lw.create_event("Duplicate:" + rule_settings["rule_root_event"]["eventName"], + d_ev = create_event("Duplicate:" + rule_settings["rule_root_event"]["eventName"], "Duplicate", event_sequence, ok=False, diff --git a/recon_lw/recon_ob_cross_stream.py b/recon_lw/matching/recon_ob_cross_stream.py similarity index 98% rename from recon_lw/recon_ob_cross_stream.py rename to recon_lw/matching/recon_ob_cross_stream.py index d1fb301..7f84da4 100644 --- a/recon_lw/recon_ob_cross_stream.py +++ b/recon_lw/matching/recon_ob_cross_stream.py @@ -2,9 +2,9 @@ from datetime import datetime from itertools import islice -from recon_lw import recon_lw -from recon_lw.EventsSaver import EventsSaver -from recon_lw.TimeCacheMatcher import TimeCacheMatcher +from recon_lw.core.EventsSaver import EventsSaver +from recon_lw.core.utility import open_scoped_events_streams, get_next_batch +from recon_lw.matching.TimeCacheMatcher import TimeCacheMatcher def synopsys(price_condition: bool, num_orders_condition: bool, size_condition: bool) -> str: @@ -451,11 +451,11 @@ def ob_compare_streams(source_events_path: pathlib.PosixPath, results_path: path # order_books_events = source_events.filter(lambda e: e["eventType"] == "OrderBook") # buffers = split_every(100, order_books_events) - streams = recon_lw.open_scoped_events_streams(source_events_path, lambda n: "default_" not in n) + streams = open_scoped_events_streams(source_events_path, lambda n: "default_" not in n) message_buffer = [None] * 100 buffer_len = 100 while len(streams) > 0: - next_batch_len = recon_lw.get_next_batch(streams, message_buffer, buffer_len, lambda e: e["body"]["timestamp"]) + next_batch_len = get_next_batch(streams, message_buffer, buffer_len, lambda e: e["body"]["timestamp"]) buffer_to_process = message_buffer if next_batch_len < buffer_len: buffer_to_process = message_buffer[:next_batch_len] # List[dict] diff --git a/recon_lw/recon_oe_ob.py b/recon_lw/matching/recon_oe_ob.py similarity index 94% rename from recon_lw/recon_oe_ob.py rename to recon_lw/matching/recon_oe_ob.py index da5c234..806bfcc 100644 --- a/recon_lw/recon_oe_ob.py +++ b/recon_lw/matching/recon_oe_ob.py @@ -3,14 +3,14 @@ from th2_data_services.data import Data -from recon_lw.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str, time_stamp_key +from recon_lw.core.ts_converters import epoch_nano_str_to_ts, ts_to_epoch_nano_str -from recon_lw import recon_lw -from recon_lw.EventsSaver import EventsSaver -from recon_lw import recon_ob_cross_stream -from recon_lw.TimeCacheMatcher import TimeCacheMatcher -from recon_lw.message_utils import message_to_dict -from recon_lw.StateSequenceGenerator import StateSequenceGenerator +from recon_lw.core.EventsSaver import EventsSaver +from recon_lw.core.utility import open_streams, get_next_batch, protocol, open_scoped_events_streams +from recon_lw.matching import recon_ob_cross_stream +from recon_lw.matching.TimeCacheMatcher import TimeCacheMatcher +from recon_lw.core.message_utils import message_to_dict +from recon_lw.matching.StateSequenceGenerator import StateSequenceGenerator def process_order_states(message_pickle_path: Optional[str], sessions_list: Optional[list], @@ -21,14 +21,14 @@ def process_order_states(message_pickle_path: Optional[str], sessions_list: Opti events_saver.save_events([root_event]) if data_objects: - streams = recon_lw.open_streams(None, data_objects=data_objects) + streams = open_streams(None, data_objects=data_objects) else: if sessions_list is not None and len(sessions_list): sessions_set = set(sessions_list) - streams = recon_lw.open_streams(message_pickle_path, + streams = open_streams(message_pickle_path, lambda n: n[:n.rfind('_')] in sessions_set) else: - streams = recon_lw.open_streams(message_pickle_path) + streams = open_streams(message_pickle_path) create_event = lambda n, t, ok, b: events_saver.create_event(n, t, ok, b, parentId=root_event["eventId"]) @@ -44,7 +44,7 @@ def process_order_states(message_pickle_path: Optional[str], sessions_list: Opti message_buffer = [None] * 100 buffer_len = 100 while len(streams) > 0: - next_batch_len = recon_lw.get_next_batch(streams, message_buffer, buffer_len, + next_batch_len = get_next_batch(streams, message_buffer, buffer_len, lambda m: m["timestamp"]) buffer_to_process = message_buffer if next_batch_len < buffer_len: @@ -59,7 +59,7 @@ def process_order_states(message_pickle_path: Optional[str], sessions_list: Opti def get_order_type(er: dict) -> int: # FIXME: change message_to_dict to resolvers mm = message_to_dict(er) - if recon_lw.protocol(er) == "FIX": + if protocol(er) == "FIX": return int(mm["OrdType"]) else: return int(mm["OrderType"]) @@ -116,16 +116,16 @@ def create_event(n, t, ok, b, am=None): save_events) data_filter = lambda e: e['body'] and e['body'].get("timestamp", -1) != -1 - streams = recon_lw.open_scoped_events_streams(ob_events_path, + streams = open_scoped_events_streams(ob_events_path, lambda n: any(s in n for s in md_sessions_list)) - streams2 = recon_lw.open_scoped_events_streams(oe_images_events_path, data_filter=data_filter) + streams2 = open_scoped_events_streams(oe_images_events_path, data_filter=data_filter) for elem in streams2: streams.add(elem) message_buffer = [None] * 100 buffer_len = 100 while len(streams) > 0: - next_batch_len = recon_lw.get_next_batch(streams, message_buffer, buffer_len, + next_batch_len = get_next_batch(streams, message_buffer, buffer_len, oe_ob_get_timestamp) buffer_to_process = message_buffer if next_batch_len < buffer_len: @@ -241,7 +241,7 @@ def oe_er_key_ts_new_key_extract(er): if er["messageType"] != "ExecutionReport": return None, None, None mm = message_to_dict(er) - if recon_lw.protocol(er) == "FIX": + if protocol(er) == "FIX": if mm["ExecType"] in ["6", "8", "E", "H"]: return None, None, None ts = ts_from_tag_val(mm["TransactTime"]) @@ -267,7 +267,7 @@ def oe_er_key_ts_new_key_extract(er): def get_resting_price(er): mm = message_to_dict(er) - if recon_lw.protocol(er) == "FIX": + if protocol(er) == "FIX": return float(mm["Price"]) else: return float(mm["OrderPrice"]) @@ -275,7 +275,7 @@ def get_resting_price(er): def get_resting_qty(er): mm = message_to_dict(er) - if recon_lw.protocol(er) == "FIX": + if protocol(er) == "FIX": return int(mm["LeavesQty"]) else: return int(mm["LeavesQuantity"]) @@ -283,7 +283,7 @@ def get_resting_qty(er): def get_trade_price(er): mm = message_to_dict(er) - if recon_lw.protocol(er) == "FIX": + if protocol(er) == "FIX": return float(mm["LastPx"]) else: return float(mm["LastPrice"]) @@ -291,7 +291,7 @@ def get_trade_price(er): def get_trade_qty(er): mm = message_to_dict(er) - if recon_lw.protocol(er) == "FIX": + if protocol(er) == "FIX": return int(mm["LastQty"]) else: return int(mm["LastQuantity"]) @@ -299,7 +299,7 @@ def get_trade_qty(er): def get_transact_time(er): mm = message_to_dict(er) - if recon_lw.protocol(er) == "FIX": + if protocol(er) == "FIX": return ts_from_tag_val(mm["TransactTime"]) else: return epoch_nano_str_to_ts(mm["TransactTime"]) @@ -307,7 +307,7 @@ def get_transact_time(er): def get_order_status(er: dict) -> int: mm = message_to_dict(er) - if recon_lw.protocol(er) == "FIX": + if protocol(er) == "FIX": return int(mm["OrdStatus"]) else: return int(mm["OrderStatus"]) diff --git a/recon_lw/matching/stream_matcher/__init__.py b/recon_lw/matching/stream_matcher/__init__.py new file mode 100644 index 0000000..0963efc --- /dev/null +++ b/recon_lw/matching/stream_matcher/__init__.py @@ -0,0 +1 @@ +from recon_lw.matching.stream_matcher.base import ReconMatcher \ No newline at end of file diff --git a/recon_lw/matching/stream_matcher/base.py b/recon_lw/matching/stream_matcher/base.py new file mode 100644 index 0000000..b7607f1 --- /dev/null +++ b/recon_lw/matching/stream_matcher/base.py @@ -0,0 +1,10 @@ +from abc import ABC, abstractmethod + +from recon_lw.core.rule import AbstractRule +from recon_lw.core.utility.recon_utils import * + + +class ReconMatcher(ABC): + @abstractmethod + def match(self, next_batch: List[Optional[Dict]], rule: AbstractRule): + pass \ No newline at end of file diff --git a/recon_lw/matching/stream_matcher/one_many.py b/recon_lw/matching/stream_matcher/one_many.py new file mode 100644 index 0000000..fa42f07 --- /dev/null +++ b/recon_lw/matching/stream_matcher/one_many.py @@ -0,0 +1,65 @@ +from recon_lw.matching.init_function import SimpleMatcherContext +from recon_lw.matching.stream_matcher.base import ReconMatcher +from recon_lw.core.rule.one_many import OneManyRuleConfig +from typing import List, Optional, Dict +from th2_data_services.config import options +from recon_lw.core.utility.recon_utils import time_index_add, message_cache_add + + +class OneManyMatcher(ReconMatcher): + + def match(self, next_batch: List[Optional[Dict]], rule: OneManyRuleConfig): + context = rule.matcher_context + if not isinstance(context, SimpleMatcherContext): + raise ValueError(f'Expected matcher_context type is SimpleMatcherContext or its extension.\ + Actual type is {type(rule.matcher_context)}') + match_index = context.match_index + time_index = context.time_index + message_cache = context.message_cache + first_key_func = rule.first_key_func + second_key_func = rule.second_key_func + + n_duplicates = 0 + for m in next_batch: + + if rule.cache_manager: + rule.cache_manager.process_unfiltered_message(m) + + first_keys = first_key_func(m) + message_id = options.mfr.get_id(m) + if first_keys is not None: + match_index_element = [message_id, None] + for first_key in first_keys: + if first_key not in match_index: + match_index[first_key] = match_index_element + time_index_add(first_key, m, time_index) + message_cache_add(m, message_cache) + continue + else: + existing = match_index[first_key] + if existing[0] is not None: + n_duplicates += 1 + else: + existing[0] = message_id + message_cache_add(m, message_cache) + continue + + second_key = second_key_func(m) + if second_key is not None: + if second_key not in match_index: + match_index[second_key] = [None, message_id] + time_index_add(second_key, m, time_index) + message_cache_add(m, message_cache) + else: + existing = match_index[second_key] + if existing[1] is None: + existing[1] = message_id + message_cache_add(m, message_cache) + else: + existing.append(message_id) + message_cache_add(m, message_cache) + if second_key is not None or first_keys is not None: + if rule.cache_manager: + rule.cache_manager.process_filtered_message(m) + if n_duplicates > 0: + pass diff --git a/recon_lw/matching/stream_matcher/pair_one.py b/recon_lw/matching/stream_matcher/pair_one.py new file mode 100644 index 0000000..0772f5e --- /dev/null +++ b/recon_lw/matching/stream_matcher/pair_one.py @@ -0,0 +1,47 @@ +from recon_lw.matching.stream_matcher.base import ReconMatcher +from recon_lw.core.rule.pair_one import PairOneRule +from typing import Optional, List, Dict +from recon_lw.core.utility.recon_utils import time_index_add, message_cache_add +from th2_data_services.config import options + +class PairOneMatcher(ReconMatcher): + + def __init__(self, rule: PairOneRule): + super().__init__() + self.rule = rule + + def match(self, next_batch: List[Optional[Dict]], rule: PairOneRule): + match_index = self.rule.context.match_index + time_index = self.rule.context.time_index + message_cache = self.rule.context.message_cache + + pair_key_func = self.rule.pair_key_func + one_key_func = self.rule.one_key_func + + for m in next_batch: + pair_key = pair_key_func(m) + message_id = options.mfr.get_id(m) + if pair_key is not None: + if pair_key not in match_index: + match_index[pair_key] = [message_id, None, None] + time_index_add(pair_key, m, time_index) + message_cache_add(m, message_cache) + else: + element = match_index[pair_key] + if element[0] is None: + element[0] = message_id + message_cache_add(m, message_cache) + elif element[1] is None: + element[1] = message_id + message_cache_add(m, message_cache) + one_key = one_key_func(m) + if one_key is not None: + if one_key not in match_index: + match_index[one_key] = [None, None, message_id] + time_index_add(one_key, m, time_index) + message_cache_add(m, message_cache) + else: + element = match_index[one_key] + if element[2] is None: + element[2] = message_id + message_cache_add(m, message_cache) \ No newline at end of file diff --git a/recon_lw/recon_lw.py b/recon_lw/recon_lw.py deleted file mode 100644 index 22791be..0000000 --- a/recon_lw/recon_lw.py +++ /dev/null @@ -1,621 +0,0 @@ -import abc -from datetime import datetime -from typing import Iterable, List, Optional, Dict, Any - -from sortedcontainers import SortedKeyList -from th2_data_services.data import Data - -from recon_lw.message_utils import message_to_dict -from os import listdir -from os import path -from recon_lw.EventsSaver import EventsSaver -from th2_data_services.config import options - -from recon_lw.stream import Streams -# Don't remove gray imports -- that was done for backward compatibility. -from recon_lw.ts_converters import time_stamp_key, epoch_nano_str_to_ts, \ - ts_to_epoch_nano_str - - -def time_index_add(key, m, time_index): - time_index.add((options.mfr.get_timestamp(m), key)) - - -def message_cache_add(m, message_cache): - message_cache[options.mfr.get_id(m)] = m - - -def message_cache_pop(m_id, message_cache): - if m_id is None: - return None - return message_cache.pop(m_id) - - -def pair_one_match(next_batch, rule_dict): - # first_key_func takes m returns string(key) - match_index = rule_dict["match_index"] - time_index = rule_dict["time_index"] - message_cache = rule_dict["message_cache"] - pair_key_func = rule_dict["pair_key_func"] - one_key_func = rule_dict["one_key_func"] - - for m in next_batch: - pair_key = pair_key_func(m) - message_id = options.mfr.get_id(m) - if pair_key is not None: - if pair_key not in match_index: - match_index[pair_key] = [message_id, None, None] - time_index_add(pair_key, m, time_index) - message_cache_add(m, message_cache) - else: - element = match_index[pair_key] - if element[0] is None: - element[0] = message_id - message_cache_add(m, message_cache) - elif element[1] is None: - element[1] = message_id - message_cache_add(m, message_cache) - one_key = one_key_func(m) - if one_key is not None: - if one_key not in match_index: - match_index[one_key] = [None, None, message_id] - time_index_add(one_key, m, time_index) - message_cache_add(m, message_cache) - else: - element = match_index[one_key] - if element[2] is None: - element[2] = message_id - message_cache_add(m, message_cache) - - -def one_many_match(next_batch, rule_dict): - """ - One to Many matching algorithm. - - It's expected that `first_key_func` will return [ke1, key2, ...] for - this type of matching - - If first_key_func will return the same value for keys -- they will be - removed as duplicates. - Second key func -- messages with the same key will be added to result and - provided to interpr func as [_, 2nd_key_match1, 2nd_key_match2, ...] - - Args: - next_batch: - rule_dict: - - Returns: - - """ - # match_index: dict[Any, MatchIndexElement] = rule_dict["match_index"] - match_index: dict[Any, list] = rule_dict["match_index"] - time_index = rule_dict["time_index"] - message_cache = rule_dict["message_cache"] - first_key_func = rule_dict["first_key_func"] - second_key_func = rule_dict["second_key_func"] - - n_duplicates = 0 - for m in next_batch: - first_keys = first_key_func(m) - message_id = options.mfr.get_id(m) - if first_keys is not None: - match_index_element = [message_id, None] - for first_key in first_keys: - if first_key not in match_index: - match_index[first_key] = match_index_element - time_index_add(first_key, m, time_index) - message_cache_add(m, message_cache) - continue - else: - existing = match_index[first_key] - if existing[0] is not None: - n_duplicates += 1 - else: - existing[0] = message_id - message_cache_add(m, message_cache) - continue - second_key = second_key_func(m) - if second_key is not None: - if second_key not in match_index: - match_index[second_key] = [None, message_id] - time_index_add(second_key, m, time_index) - message_cache_add(m, message_cache) - else: - existing = match_index[second_key] - if existing[1] is None: # existing[1] - stream 2 message ID - existing[1] = message_id - # match_index[second_key] = [existing[0], message_id] - message_cache_add(m, message_cache) - else: - existing.append(message_id) - message_cache_add(m, message_cache) - - if n_duplicates > 0: - print(n_duplicates, " duplicates detected") - - -def flush_old(current_ts, horizon_delay, time_index): - result = [] - horizon_edge = len(time_index) - if current_ts is not None: - edge_timestamp = {"epochSecond": current_ts["epochSecond"] - horizon_delay, - "nano": 0} - horizon_edge = time_index.bisect_key_left( - time_stamp_key(edge_timestamp)) - - if horizon_edge > 0: - n = 0 - while n < horizon_edge: - nxt = time_index.pop(0) - result.append(nxt[1]) - n += 1 - return result - - -# match_compare_func takes m1, m2 returns e -# end_events_func tekes iterable of events -def rule_flush(current_ts, horizon_delay, match_index: dict, time_index, message_cache, - interpret_func, event_sequence: dict, send_events_func, - parent_event, live_orders_cache): - old_keys = flush_old(current_ts, horizon_delay, time_index) - events = [] - for match_key in old_keys: - elem = match_index.pop(match_key) # elem -- can have 2 or 3 elements inside - if elem[0] is not None and elem[0] not in message_cache: - # request already processed through different key - continue - - # interpret_func function has exact format - # arg0 - list of matched messages - # arg1 - ?? - # arg2 - EventSequence - results = interpret_func( - [message_cache_pop(item, message_cache) for item in elem], - live_orders_cache, - event_sequence - ) - # result = interpret_func(message_cache_pop(elem[0], message_cache), - # message_cache_pop(elem[1], message_cache), event_sequence) - if results is not None: - for r in results: - r["parentEventId"] = parent_event["eventId"] - events.append(r) - - send_events_func(events) - - -def create_event_id(event_sequence: dict): - event_sequence["n"] += 1 - return event_sequence["name"] + "_" + event_sequence["stamp"] + "-" + str(event_sequence["n"]) - - -def create_event(name, type, event_sequence: dict, ok=True, body=None, parentId=None): - # TODO - description is required. - ts = datetime.now() - e = {"eventId": create_event_id(event_sequence), - "successful": ok, - "eventName": name, - "eventType": type, - "body": body, - "parentEventId": parentId, - "startTimestamp": {"epochSecond": int(ts.timestamp()), "nano": ts.microsecond * 1000}, - "attachedMessageIds": []} - return e - - -class RuleSettings: - """Prototype of the RuleSettings class. - - This class will be used instead of current Dict settings config. - This class should be backward-compatible with current Dict settings config solution. - - - It describes - - configuration function interfaces. - - ... - """ - - def __init__(self): - """ - TODO - every parameter should be described in detail. - - """ - self.horizon_delay = None - self.match_index = None - self.time_index = None - self.message_cache = None - self.rule_root_event = None - self.live_orders_cache = None - - @abc.abstractmethod - def interpret_func(self, match_msgs: List[dict], _, event_sequence: dict): - """ - - Args: - match_msgs: list of matched messages - _: - event_sequence: some dict that looks like - {"name": "recon_lw", "stamp": str(box_ts.timestamp()), "n": 0} - - Returns: - - """ - pass - - @abc.abstractmethod - def rule_match_func(self, next_batch, rule_dict): - pass - - @abc.abstractmethod - def first_key_func(self, message): - """Should return list of objects for 1 to many match mode.""" - pass - - @abc.abstractmethod - def second_key_func(self, message): - pass - - def flush_func(self, ts, event_sequence: dict, save_events_func): - """ - TODO - description is required. - - Args: - ts: ?? - event_sequence: ?? - save_events_func: a function that will store result events to (file/DB/..). - - Returns: - - """ - rule_flush(current_ts=ts, - horizon_delay=self.horizon_delay, - match_index=self.match_index, - time_index=self.time_index, - message_cache=self.message_cache, - interpret_func=self.interpret_func, - event_sequence=event_sequence, - send_events_func=save_events_func, - parent_event=self.rule_root_event, - live_orders_cache=self.live_orders_cache) - - -# {"first_key_func":..., "second_key_func",... "interpret_func"} -def execute_standalone(message_pickle_path, sessions_list, result_events_path, - rules_settings_dict: Dict[str, Dict[str, Any]], - data_objects=None, - buffer_len=100): - """Entrypoint for Horizon Recon. - - Horizon Recon will store all messages withing time window == 'horizon_delay'. - - - It generates ReconEvents and stores them in the `result_events_path` file - to disc in pickle format. - - It matches messages 1 to 1 or 1 to many (depends on `rule_match_func` param - in the config). - When messages were matched or unmatched, the list of [msg1, *msgs2] will be - passed to interp_func. - msg1 and *msgs2 can be None if no match message was found. - It's not possible case when all msgs are None. - - If you provide data_objects, message_pickle_path -- will be ignored. - - rules_settings_dict: - first_key_func(self, message): - Should return list of objects for 1 to many match mode. - - Note: - 1. All messages should have `timestamp` field. - 2. Messages from Streams will be handled one by one sorted by - timestamp. - 3. Stream object inside Streams should be sorted. - 4. DON'T PASS UNITED DATA OBJECT AS `data_objects` - e.g. you have 3 streams (3 Data objects) and you want to filter out - each of them. (e.g. remove all Heartbeats) - Don't need to do `stream = Data([d1, d2, d3]).filter(...) - That will work wrong! - **Horizon recon expects a set of sorted by Timestamp streams.** - - Args: - message_pickle_path: - sessions_list: - result_events_path: - rules_settings_dict: { ReconRuleName: {}, ... } - data_objects: - - Returns: - - """ - box_ts = datetime.now() - events_saver = EventsSaver(result_events_path) - # TODO - let's use an Object maybe instead of dict for event_sequence? - event_sequence = {"name": "recon_lw", "stamp": str(box_ts.timestamp()), "n": 0} - root_event = create_event("recon_lw " + box_ts.isoformat(), "Microservice", event_sequence) - - events_saver.save_events([root_event]) - # TODO -- rule_settings -- will be changed to Class. - # We should leave backward compatibility with current DICT solution. - for rule_key, rule_settings in rules_settings_dict.items(): - rule_settings["rule_root_event"] = create_event(rule_key, "LwReconRule", - event_sequence, - parentId=root_event["eventId"]) - rule_settings["events_saver"] = events_saver - rule_settings["event_sequence"] = event_sequence - if "init_func" not in rule_settings: - rule_settings["init_func"] = init_matcher - if "collect_func" not in rule_settings: - rule_settings["collect_func"] = collect_matcher - if "flush_func" not in rule_settings: - rule_settings["flush_func"] = flush_matcher - rule_settings["init_func"](rule_settings) - - events_saver.save_events([r["rule_root_event"] for r in rules_settings_dict.values()]) - if data_objects: - streams = open_streams(message_pickle_path, data_objects=data_objects) - else: - if sessions_list is not None and len(sessions_list): - sessions_set = set(sessions_list) - streams = open_streams(message_pickle_path, - lambda n: n[:n.rfind('_')] in sessions_set) - else: - streams = open_streams(message_pickle_path) - - message_buffer = [None] * buffer_len - - while len(streams) > 0: - next_batch_len = streams.get_next_batch(message_buffer, buffer_len, - lambda m: m["timestamp"]) - buffer_to_process = message_buffer - if next_batch_len < buffer_len: - buffer_to_process = message_buffer[:next_batch_len] - for rule_settings in rules_settings_dict.values(): - rule_settings["collect_func"](buffer_to_process, rule_settings) - ts = buffer_to_process[len(buffer_to_process) - 1]["timestamp"] - rule_settings["flush_func"](ts, rule_settings, event_sequence, - lambda ev_batch: events_saver.save_events(ev_batch)) - # final flush - for rule_settings in rules_settings_dict.values(): - rule_settings["flush_func"](None, rule_settings, event_sequence, - lambda ev_batch: events_saver.save_events(ev_batch)) - # one final flush - events_saver.flush() - - -def init_matcher(rule_settings): - rule_settings["match_index"] = {} - rule_settings["time_index"] = SortedKeyList(key=lambda t: time_stamp_key(t[0])) - rule_settings["message_cache"] = {} - - -def collect_matcher(batch, rule_settings): - rule_match_func = rule_settings["rule_match_func"] - rule_match_func(batch, rule_settings) - if "live_orders_cache" in rule_settings: - rule_settings["live_orders_cache"].process_objects_batch(batch) - - -def flush_matcher(ts, rule_settings, event_sequence: dict, save_events_func): - rule_flush(ts, - rule_settings["horizon_delay"], - rule_settings["match_index"], - rule_settings["time_index"], - rule_settings["message_cache"], - rule_settings["interpret_func"], - event_sequence, - save_events_func, - rule_settings["rule_root_event"], - rule_settings["live_orders_cache"] if "live_orders_cache" in rule_settings else None) - - -def simplify_message(m): - """Returns a copy of m with changed fields: - - Added: - - simpleBody - - protocol - - Removed - - body - - bodyBase64 - - :param m: - :return: - """ - mm = m.copy() - if len(m["body"]) > 0: - mm["simpleBody"] = message_to_dict(m) - mm["protocol"] = protocol(m) - else: - mm["simpleBody"] = {} - - # TODO - # - it's better to get these names from DataSource message description. - # - it's possible that sometime the path of the body will be changed. - mm.pop("body") - mm.pop("bodyBase64") - return mm - - -def load_to_list(messages: Iterable[dict], simplify: bool) -> List[dict]: - if simplify: - return list(map(simplify_message, messages)) - else: - return list(messages) - - -def split_messages_pickle_for_recons(message_pickle_path, output_path, sessions_list, - simplify=True): - """DEPRECATED FUNCTIONS SINCE WE HAVE DownloadCommand in LwDP data source. - - :param message_pickle_path: - :param output_path: - :param sessions_list: - :param simplify: - :return: - """ - messages = Data.from_cache_file(message_pickle_path) - for s in sessions_list: - messages_session_in = messages.filter( - lambda m: options.mfr.get_session_id(m) == s and options.mfr.get_direction(m) == "IN") - print("Sorting ", s, " IN ", datetime.now()) - arr = load_to_list(messages_session_in, simplify) - arr.sort(key=lambda m: time_stamp_key(m["timestamp"])) - messages_session_in_to_save = Data(arr) - file_name = output_path + "/" + s + "_IN.pickle" - print("Saving ", file_name, " ", datetime.now()) - messages_session_in_to_save.build_cache(file_name) - - messages_session_out = messages.filter( - lambda m: options.mfr.get_session_id(m) == s and options.mfr.get_direction(m) == "OUT") - print("Sorting ", s, " OUT ", datetime.now()) - arr = load_to_list(messages_session_out, simplify) - arr.sort(key=lambda m: time_stamp_key(m["timestamp"])) - messages_session_out_to_save = Data(arr) - - file_name = output_path + "/" + s + "_OUT.pickle" - print("Saving ", file_name, " ", datetime.now()) - messages_session_out_to_save.build_cache(file_name) - - -def protocol(m): - """ - - Expects the message after expand_message function. - - :param m: - :return: - """ - # Simplified message - if "body" not in m: - return m["protocol"] - - if len(m["body"]) == 0: - return "error" - - pr = options.smsr.get_protocol(options.mfr.get_body()) - return "not_defined" if pr is None else pr - - -def open_scoped_events_streams( - streams_path, - name_filter=None, - data_filter=None -) -> Streams: - """ - Get Streams object for Th2 events. - - Args: - streams_path: - name_filter: - data_filter: - - Returns: - Streams: [(Th2ProtobufTimestamp, - iterator for Data object, - First object from Data object or None), ...] - """ - streams = Streams() - files = listdir(streams_path) - files.sort() - # This part to replace Data+Data to Data([Data,Data]) - scopes_streams_temp: Dict[str, list] = {} - for f in files: - if ".pickle" not in f: - continue - if name_filter is not None and not name_filter(f): - continue - scope = f[:f.index("_scope_")] - if scope not in scopes_streams_temp: - scopes_streams_temp[scope] = [Data.from_cache_file(path.join(streams_path, f))] - else: - scopes_streams_temp[scope].append(Data.from_cache_file(path.join(streams_path, f))) - - scopes_streams: Dict[str, Data] = {scope: Data(scopes_streams_temp[scope]) - for scope in scopes_streams_temp} - for strm in scopes_streams.values(): - if data_filter: - strm = strm.filter(data_filter) - streams.add_stream(strm) - return streams - - -def open_streams( - streams_path: Optional[str], - name_filter=None, - expanded_messages: bool = False, - data_objects: List[Data] = None -) -> Streams: - """ - Get Streams object for Th2 messages. - - Args: - streams_path: - name_filter: - expanded_messages: - data_objects: - - Returns: - Streams: [(Th2ProtobufTimestamp, - iterator for Data object, - First object from Data object or None), ...] - """ - streams = Streams() - - if data_objects: - for do in data_objects: - ts0 = {"epochSecond": 0, "nano": 0} - if expanded_messages: - stream = (mm for m in do for mm in options.mfr.expand_message(m)) - else: - stream = do - streams.add((ts0, iter(stream), None)) - else: - files = listdir(streams_path) - for f in files: - if ".pickle" not in f: - continue - if name_filter is not None and not name_filter(f): - continue - data_object = Data.from_cache_file(path.join(streams_path, f)) - if expanded_messages: - stream = (mm for m in data_object for mm in - options.MESSAGE_FIELDS_RESOLVER.expand_message(m)) - else: - stream = Data.from_cache_file(path.join(streams_path, f)) - streams.add_stream(stream) - - return streams - - -def get_next_batch(streams: Streams, - batch: List[Optional[dict]], - batch_len, - get_timestamp_func) -> int: - """ - - Args: - streams: [(Th2ProtobufTimestamp, - iterator for Data object, - First object from Data object or None), ...] - batch: - b_len: - get_timestamp_func: - - Returns: - - """ - # DEPRECATED. - return streams.get_next_batch( - batch=batch, - batch_len=batch_len, - get_timestamp_func=get_timestamp_func - ) - - -def sync_stream(streams: Streams, - get_timestamp_func): - # DEPRECATED. - # Use streams.sync_streams instead. - yield from streams.sync_streams(get_timestamp_func) - diff --git a/recon_lw/recon_lw_entrypoint.py b/recon_lw/recon_lw_entrypoint.py new file mode 100644 index 0000000..e184238 --- /dev/null +++ b/recon_lw/recon_lw_entrypoint.py @@ -0,0 +1,163 @@ +from recon_lw.core.EventsSaver import EventsSaver +from typing import Union, Callable, Any + +from recon_lw.core.rule import AbstractRule +from recon_lw.core.rule.base import RuleContext +from recon_lw.matching.collect_matcher import CollectMatcher + +from recon_lw.core.utility import * +from recon_lw.matching.flush_function import FlushFunction +from recon_lw.matching.old.matching import init_matcher, collect_matcher, flush_matcher + + +def execute_standalone(message_pickle_path, sessions_list, result_events_path, + rules: Dict[str, Dict[str, Union[Dict[str, Any], AbstractRule]]], + data_objects=None, + buffer_len=100): + """Entrypoint for recon-lw. + + It generates ReconEvents and stores them in the `result_events_path` file + to disc in pickle format. + + It matches messages 1 to 1 or 1 to many (depends on `rule_match_func` param + in the config). + When messages were matched or unmatched, the list of [msg1, *msgs2] will be + passed to interp_func. + msg1 and *msgs2 can be None if no match message was found. + It's not possible case when all msgs are None. + + If you provide data_objects, message_pickle_path -- will be ignored. + + + Args: + message_pickle_path: + sessions_list: + result_events_path: + rules_settings_dict: { ReconRuleName: {}, ... } + data_objects: + + Returns: + + """ + box_ts = datetime.now() + events_saver = EventsSaver(result_events_path) + + event_sequence = EventSequence(name="recon_lw", timestamp=str(box_ts.timestamp()), n=0).to_dict() + root_event = create_event("recon_lw " + box_ts.isoformat(), "Microservice", event_sequence) + + events_saver.save_events([root_event]) + new_rules_settings_dict = {} + for rule_key, rule_settings in rules.items(): + if isinstance(rule_settings, dict): + new_rules_settings_dict[rule_key] = preprocess_rule_config_dict( + rule_key, + event_sequence, + root_event, + rule_settings, + events_saver + ) + elif isinstance(rule_settings, AbstractRule): + new_rules_settings_dict[rule_key] = preprocess_rule_config_object( + rule_key, + event_sequence, + root_event, + rule_settings, + events_saver + ) + else: + raise SystemError("Invalid rule settings type.") + + events_saver.save_events( + [ + r.rule_context.rule_root_event if isinstance(r, AbstractRule) else r["rule_root_event"] + for r in new_rules_settings_dict.values() + ] + ) + if data_objects: + streams = open_streams(message_pickle_path, data_objects=data_objects) + else: + if sessions_list is not None and len(sessions_list): + sessions_set = set(sessions_list) + streams = open_streams(message_pickle_path, + lambda n: n[:n.rfind('_')] in sessions_set) + else: + streams = open_streams(message_pickle_path) + + message_buffer = [None] * buffer_len + + while len(streams) > 0: + next_batch_len = streams.get_next_batch(message_buffer, buffer_len, + lambda m: m["timestamp"]) + buffer_to_process = message_buffer + if next_batch_len < buffer_len: + buffer_to_process = message_buffer[:next_batch_len] + + for rule_settings in new_rules_settings_dict.values(): + if isinstance(rule_settings, AbstractRule): + rule_settings.collect_func.collect_matches(buffer_to_process, rule_settings) + ts = buffer_to_process[len(buffer_to_process) - 1]["timestamp"] + rule_settings.flush_func( + ts, + rule_settings, + lambda ev_batch: events_saver.save_events(ev_batch) + ) + else: + rule_settings["collect_func"](buffer_to_process, rule_settings) + ts = buffer_to_process[len(buffer_to_process) - 1]["timestamp"] + rule_settings["flush_func"](ts, rule_settings, event_sequence, + lambda ev_batch: events_saver.save_events(ev_batch)) + # final flush + for rule_settings in new_rules_settings_dict.values(): + if isinstance(rule_settings, AbstractRule): + rule_settings.flush_func( + None, + rule_settings, + lambda ev_batch: events_saver.save_events(ev_batch) + ) + else: + rule_settings["flush_func"](None, rule_settings, event_sequence, + lambda ev_batch: events_saver.save_events(ev_batch)) + # one final flush + events_saver.flush() + +def preprocess_rule_config_object( + rule_key: str, + event_sequence: dict, + root_event: dict, + rule: AbstractRule, + events_saver: EventsSaver +) -> AbstractRule: + rule_root_event = create_event(rule_key, "LwReconRule", + event_sequence, + parentId=root_event["eventId"]) + + rule.set_rule_context(RuleContext(rule_root_event, events_saver, event_sequence)) + return rule + +def preprocess_rule_config_dict( + rule_key: str, + event_sequence: dict, + root_event: dict, + rule: dict, + events_saver: EventsSaver +) -> dict: + rule_root_event = create_event(rule_key, "LwReconRule", + event_sequence, + parentId=root_event["eventId"]) + + rule["rule_root_event"] = rule_root_event + rule["events_saver"] = events_saver + rule["event_sequence"] = event_sequence + + if "init_func" not in rule: + rule["init_func"] = init_matcher\ + + if "collect_func" not in rule: + rule["collect_func"] = collect_matcher + if "flush_func" not in rule: + rule["flush_func"] = flush_matcher + rule["init_func"](rule) + return rule + + +# \ No newline at end of file diff --git a/recon_lw/reporting/__init__.py b/recon_lw/reporting/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/coverage/__init__.py b/recon_lw/reporting/coverage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/coverage/viewer/__init__.py b/recon_lw/reporting/coverage/viewer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/coverage/viewer/fields_viewer.py b/recon_lw/reporting/coverage/viewer/fields_viewer.py new file mode 100644 index 0000000..6dfdd4d --- /dev/null +++ b/recon_lw/reporting/coverage/viewer/fields_viewer.py @@ -0,0 +1,75 @@ +from dataclasses import asdict +from typing import Dict, List, Any +import textwrap as tw + +from IPython.core.display import HTML +from IPython.core.display_functions import display + +from recon_lw.reporting.recon_context.context import ReconContext +from recon_lw.reporting.recon_metadata.base import ReconMetadata + +ROW_DEFAULT_STYLE_CONFIG = "text-align: left" +class FieldsTableStyleConfig: + def __init__( + self, + row_style: str=ROW_DEFAULT_STYLE_CONFIG, + text_field_width: int = 100, + row_font_size: int=15 + ): + self.row_style = row_style + self.row_font_size=row_font_size + self.text_field_width = text_field_width + +class FieldsTableViewer: + @staticmethod + def get_fields_table( + table_name: str, + fields: List[Dict[str, Any]], + style_config: FieldsTableStyleConfig + ): + columns = list(fields[0].keys()) + + def get_row(field: dict): + row = '' + for field in field.values(): + row += f'\n{FieldsViewerUtils.wrap_text(field, style_config)}' + row += "" + return row + + table = f""" + + + + + """ + table += "" + for column in columns: + table += (f'') + table += "" + + table += "\n".join( + [get_row(item) for item in fields] + ) + + table += "
{table_name}
' + f'{column}
" + + return table + +class ReconMetadataFieldsViewer: + def __init__(self, + recon_context: ReconContext, + styles: FieldsTableStyleConfig = FieldsTableStyleConfig() + ): + self.recon_context = recon_context + self.styles = styles + + def display(self): + for recon_name, metadata in self.recon_context.get_metadata().items(): + fields = list(map(lambda x: asdict(x), metadata.covered_fields)) + display(HTML(FieldsTableViewer.get_fields_table(recon_name, fields, self.styles))) + +class FieldsViewerUtils: + @staticmethod + def wrap_text(text: str, style: FieldsTableStyleConfig) -> str: + return "
".join(tw.wrap(text, width=style.text_field_width)) \ No newline at end of file diff --git a/recon_lw/reporting/known_issues/__init__.py b/recon_lw/reporting/known_issues/__init__.py new file mode 100644 index 0000000..3eb6be6 --- /dev/null +++ b/recon_lw/reporting/known_issues/__init__.py @@ -0,0 +1,3 @@ +from recon_lw.reporting.known_issues.exec_type import * +from recon_lw.reporting.known_issues.issue import * +from recon_lw.reporting.known_issues.issue_status import * \ No newline at end of file diff --git a/recon_lw/reporting/known_issues/exec_type.py b/recon_lw/reporting/known_issues/exec_type.py new file mode 100644 index 0000000..ce8f126 --- /dev/null +++ b/recon_lw/reporting/known_issues/exec_type.py @@ -0,0 +1,10 @@ +from enum import Enum + +class ExecType(Enum): + NEW = "New" + CANCELLED = "Cancelled" + REPLACED = "Replaced" + TRADE = "Trade" + TRIGGERED = "Triggered" + REJECTED = "Rejected" + RESTATED = "Restated" \ No newline at end of file diff --git a/recon_lw/reporting/known_issues/issue.py b/recon_lw/reporting/known_issues/issue.py new file mode 100644 index 0000000..ac714be --- /dev/null +++ b/recon_lw/reporting/known_issues/issue.py @@ -0,0 +1,61 @@ +from typing import Optional + +from recon_lw.reporting.known_issues.issue_status import IssueStatus +from datetime import datetime + +class Issue: + def __init__( + self, + code: str, + description: str, + status: IssueStatus, + status_update_date: str, + expected_fix_version: Optional[str] = None, + status_reason: Optional[str]=None, + is_wip: bool = False, + **kwargs + ): + self.status_update_date = status_update_date + self.expected_fix_version = expected_fix_version + self.status_reason = status_reason + self.code = code + self.description = description + self.status = status + self.is_wip = is_wip + + def _rep(self): + if self.expected_fix_version is not None: + expected_fix = f'[Expected fix: {self.expected_fix_version}]' + else: + expected_fix = '' + + if self.status_reason is not None: + status_reason = f'[Status reason: {self.status_reason}]' + else: + status_reason = '' + + if self.is_wip: + res = f"{self.code} {expected_fix} {status_reason} - " \ + f"{self.description}" + else: + res = f"{self.code} [{self.status}, {self.status_update_date}]{expected_fix} {status_reason} - " \ + f"{self.description}" + + if self.status in {IssueStatus.CLOSED, IssueStatus.DRAFT}: + if self.status == IssueStatus.DRAFT and self.is_wip: + return res + return f"! {res}" + return res + + def __str__(self): + return self._rep() + + def __repr__(self): + return self._rep() + + def __add__(self, other): + return f"{self}{other}" + + def __radd__(self, other): + return f"{other}{self}" + diff --git a/recon_lw/reporting/known_issues/issue_status.py b/recon_lw/reporting/known_issues/issue_status.py new file mode 100644 index 0000000..340a052 --- /dev/null +++ b/recon_lw/reporting/known_issues/issue_status.py @@ -0,0 +1,7 @@ +from enum import Enum + +class IssueStatus(Enum): + FOR_REVIEW = "ForReview" + CLOSED = "Closed" + APPROVED = "Approved" + DRAFT = "Draft" \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/__init__.py b/recon_lw/reporting/match_diff/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/match_diff/categorizer/__init__.py b/recon_lw/reporting/match_diff/categorizer/__init__.py new file mode 100644 index 0000000..989f6e4 --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/__init__.py @@ -0,0 +1 @@ +from recon_lw.reporting.match_diff.categorizer.types import * \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/categorizer/base.py b/recon_lw/reporting/match_diff/categorizer/base.py new file mode 100644 index 0000000..cc3ca8a --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/base.py @@ -0,0 +1,30 @@ +from abc import abstractmethod, ABC +from typing import List + +from recon_lw.reporting.match_diff.categorizer.types import ReconErrorStatsContext +from recon_lw.reporting.match_diff.categorizer.types import ErrorCategoriesStats +from recon_lw.reporting.match_diff.categorizer.types.error_examples import ErrorExamples +from recon_lw.reporting.match_diff.categorizer.types import ProblemFields +from recon_lw.reporting.match_diff.categorizer.types import MatchesStats + + +class IErrorCategorizer(ABC): + def __init__(self): + self._error_stats = ErrorCategoriesStats() + self._matches_stats = MatchesStats() + self._problem_fields = ProblemFields() + self._error_examples = ErrorExamples() + + def process_events(self, events: List[dict]) -> ReconErrorStatsContext: + for event in events: + self.process_event(event) + return ReconErrorStatsContext( + error_examples=self._error_examples, + error_stats=self._error_stats, + problem_fields=self._problem_fields, + matches_stats=self._matches_stats + ) + + @abstractmethod + def process_event(self, event: dict): + pass \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/categorizer/basic.py b/recon_lw/reporting/match_diff/categorizer/basic.py new file mode 100644 index 0000000..b9206d2 --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/basic.py @@ -0,0 +1,64 @@ +from recon_lw.interpretation.interpretation_functions import ReconType +from recon_lw.reporting.match_diff.categorizer.base import IErrorCategorizer +from recon_lw.reporting.match_diff.categorizer.event_category.base import ErrorCategoryStrategy +from recon_lw.reporting.recon_context.context import ReconContext + + +class BasicErrorCategoriser(IErrorCategorizer): + def __init__( + self, + error_extractor_strategy: ErrorCategoryStrategy, + recon_context: ReconContext + ): + super().__init__() + self.error_extractor_strategy = error_extractor_strategy + self.efr = recon_context.get_efr() + self.mfr = recon_context.get_mft() + + + def process_event( + self, + event: dict, + ): + e = event + etype = self.efr.get_type(event) + status = self.efr.get_status(event) + recon_name = event["recon_name"] + body = event["body"] + body = body if body is not None else {} + is_match = etype == ReconType.BasicReconMatch.value + is_diff = body.get('diff') is not None + + if is_match and not is_diff: + try: + orig, copy = self.efr.get_attached_messages_ids(event) + except ValueError: + # TODO: what to do with multimatches + return + + if orig and copy: + category = self.error_extractor_strategy.match_extractor(recon_name, orig, copy, event) + recon_name = f"{recon_name} | [{category.name}]" + + self._matches_stats.add_match(recon_name) + + if is_match and is_diff: + try: + orig, copy = self.efr.get_attached_messages_ids(event) + except ValueError: + # TODO: what to do with multimatches + return + + if orig and copy: + category = self.error_extractor_strategy.match_diff_extractor(recon_name, orig, copy, event) + recon_name = f"{recon_name} | [{category.name}]" + + for diff in event['body']['diff']: + category = self.error_extractor_strategy.diff_category_extractor(recon_name, diff, event) + if not category: + continue + + field = diff["field_name"] + self._problem_fields.add_problem_field(recon_name, field) + self._error_stats.add_error_category(recon_name, category) + self._error_examples.add_error_example(recon_name, category, event['attachedMessageIds']) \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/categorizer/event_category/__init__.py b/recon_lw/reporting/match_diff/categorizer/event_category/__init__.py new file mode 100644 index 0000000..7ddc7aa --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/event_category/__init__.py @@ -0,0 +1,2 @@ +from recon_lw.reporting.match_diff.categorizer.event_category.base import * +from recon_lw.reporting.match_diff.categorizer.event_category.basic import * \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/categorizer/event_category/base.py b/recon_lw/reporting/match_diff/categorizer/event_category/base.py new file mode 100644 index 0000000..b0aac68 --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/event_category/base.py @@ -0,0 +1,44 @@ +from abc import abstractmethod, ABC +from dataclasses import dataclass +from typing import Protocol + + +@dataclass +class EventCategory: + name: str + +class IEventCategoryExtractor(ABC): + + def __call__(self, recon_name: str, orig, copy, event: dict) -> EventCategory: + return self.extract_category(recon_name, orig, copy, event) + + @abstractmethod + def extract_category(self, recon_name: str, orig, copy, event: dict) -> EventCategory: + pass + +class IEventCategoryExtractorProtocol(Protocol): + def __call__(self, recon_name: str, orig, copy, event: dict): + pass + +class IDiffCategoryExtractor(ABC): + def __call__(self, recon_name: str, diff: dict, event: dict) -> EventCategory: + return self.extract_category(recon_name, diff, event) + + @abstractmethod + def extract_category(self, recon_name: str, diff: dict, event: dict) -> EventCategory: + pass + + +class IDiffCategoryExtractorProtocol(Protocol): + + def __call__(self, recon_name: str, diff: dict, event: dict) -> EventCategory: + pass + + +@dataclass +class ErrorCategoryStrategy: + match_extractor: IEventCategoryExtractorProtocol + match_diff_extractor: IEventCategoryExtractorProtocol + miss_left_extractor: IEventCategoryExtractorProtocol + miss_right_extractor: IEventCategoryExtractorProtocol + diff_category_extractor: IDiffCategoryExtractorProtocol diff --git a/recon_lw/reporting/match_diff/categorizer/event_category/basic.py b/recon_lw/reporting/match_diff/categorizer/event_category/basic.py new file mode 100644 index 0000000..6de049c --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/event_category/basic.py @@ -0,0 +1,89 @@ +from typing import Dict + +from recon_lw.reporting.known_issues.issue import Issue +from recon_lw.reporting.match_diff.categorizer.event_category.base import IDiffCategoryExtractor, EventCategory, \ + IEventCategoryExtractor + + +class BasicDiffCategoryExtractor(IDiffCategoryExtractor): + def __init__(self, + known_issues: Dict[str, Issue], + text_fields_masked_values=None, + list_fields_masked_values=None, + additional_field_aliases=None + ): + if known_issues is None: + known_issues = [] + if text_fields_masked_values is None: + text_fields_masked_values = [] + if list_fields_masked_values is None: + list_fields_masked_values = [] + if additional_field_aliases is None: + additional_field_aliases = {} + self.known_issues = known_issues + self.text_fields_masked_values = text_fields_masked_values + self.list_fields = list_fields_masked_values + self.additional_fields_aliases = additional_field_aliases + + def extract_category(self, recon_name: str, diff: dict, event: dict) -> EventCategory: + expected = diff["expected"] + actual = diff["actual"] + + field = diff["field_name"] + if isinstance(expected, dict): + cat = f"{recon_name}: {field}: {expected['message']}" + issue = self.known_issues.get(cat) + + if issue: + cat += f" | {issue}" + + return EventCategory(cat) + + if isinstance(actual, dict): + return None + + expected = self._primify(expected) + actual = self._primify(actual) + + field = diff["field_name"] + + if field in self.text_fields_masked_values: + + if expected not in ("__NOT_EXISTS__", "''") and not isinstance(expected, bool): + expected = "TEXT VALUE" + + if actual not in ("__NOT_EXISTS__", "''") and not isinstance(actual, bool): + actual = "TEXT_VALUE" + + elif field in self.list_fields: + expected = "LIST VALUE" + actual = "LIST VALUE" + + cat = f"{recon_name}: field {field} {expected} != {actual}" + additional_fields_info = event['body'].get('additional_fields_info') + if additional_fields_info: + additional_info = " | ".join(self._get_additional_info_formatted(key, values) for key, values in additional_fields_info.items()) + + cat = f"{cat} | {additional_info}" + issue = self.known_issues.get(cat) + if issue: + cat += f" | {issue}" + return EventCategory(cat) + + + def _get_additional_info_formatted(self, key, values): + alias = self.additional_fields_aliases.get(key) + if alias: + key = alias + + if values[0] == values[1]: + return f"{key}='{values[0]}'" + else: + return f"{key}='{values[0]}'!='{values[1]}" + + def _primify(self, str): + return f"'{str}'" + +class BasicEventCategoryExtractor(IEventCategoryExtractor): + def extract_category(self, recon_name: str, orig, copy, event: dict) -> EventCategory: + return EventCategory(recon_name) \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/categorizer/types/__init__.py b/recon_lw/reporting/match_diff/categorizer/types/__init__.py new file mode 100644 index 0000000..1c51978 --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/types/__init__.py @@ -0,0 +1,5 @@ +from recon_lw.reporting.match_diff.categorizer.types.match_stats import * +from recon_lw.reporting.match_diff.categorizer.types.context import * +from recon_lw.reporting.match_diff.categorizer.types.error_categories_stats import * +from recon_lw.reporting.match_diff.categorizer.types.error_examples import * +from recon_lw.reporting.match_diff.categorizer.types.field_problems import * \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/categorizer/types/context.py b/recon_lw/reporting/match_diff/categorizer/types/context.py new file mode 100644 index 0000000..419edc2 --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/types/context.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass + +from recon_lw.reporting.match_diff.categorizer.types.error_categories_stats import ErrorCategoriesStats +from recon_lw.reporting.match_diff.categorizer.types.error_examples import ErrorExamples +from recon_lw.reporting.match_diff.categorizer.types.field_problems import ProblemFields +from recon_lw.reporting.match_diff.categorizer.types.match_stats import MatchesStats + + +@dataclass +class ReconErrorStatsContext: + error_examples: ErrorExamples + error_stats: ErrorCategoriesStats + problem_fields: ProblemFields + matches_stats: MatchesStats \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/categorizer/types/error_categories_stats.py b/recon_lw/reporting/match_diff/categorizer/types/error_categories_stats.py new file mode 100644 index 0000000..e4dd319 --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/types/error_categories_stats.py @@ -0,0 +1,32 @@ +from collections import defaultdict +from typing import Dict + +import tabulate + +from recon_lw.reporting.match_diff.categorizer.event_category.base import EventCategory + + +class ErrorCategoriesStats: + def __init__(self, error_categories: Dict[EventCategory, int]=None): + if not error_categories: + error_categories = defaultdict(lambda: defaultdict(lambda: 0)) + self.error_categories = error_categories + + def add_error_category(self, recon_name, error_category: EventCategory): + self.error_categories[recon_name][error_category] += 1 + + def _get_sorted_error_categories(self, recon_name): + return [ + (k, v) for k, v in sorted( + self.error_categories[recon_name].items(), key=lambda x: x[1], reverse=True + ) + ] + + def get_table_stats(self, recon_name: str): + return tabulate.tabulate( + self._get_sorted_error_categories(recon_name), + headers=['category', 'count'], + tablefmt='html' + ) + + diff --git a/recon_lw/reporting/match_diff/categorizer/types/error_examples.py b/recon_lw/reporting/match_diff/categorizer/types/error_examples.py new file mode 100644 index 0000000..a322339 --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/types/error_examples.py @@ -0,0 +1,28 @@ +from collections import defaultdict +from typing import Optional, List + +from recon_lw.reporting.match_diff.categorizer.event_category import EventCategory + + +class ErrorExamples: + def __init__(self, category_example_limit=5): + self._error_examples = defaultdict(lambda: defaultdict(list)) + self.category_example_limit = category_example_limit + self._error_ids = [] + + def add_error_example(self, recon_name: str, error_category: EventCategory, attached_ids: Optional[List[str]]): + if attached_ids is not None: + n = len(self._error_examples[recon_name][error_category]) + if n < self.category_example_limit: + self._error_examples[recon_name][error_category].append(attached_ids) + for attached_id in attached_ids: + self._error_ids.append(attached_id) + + def is_id_affected(self, message_id): + return message_id in self._error_ids + + def get_affected_recons(self): + return self._error_examples.keys() + + def get_examples(self, recon_name) -> dict: + return self._error_examples[recon_name] \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/categorizer/types/field_problems.py b/recon_lw/reporting/match_diff/categorizer/types/field_problems.py new file mode 100644 index 0000000..0fded9f --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/types/field_problems.py @@ -0,0 +1,22 @@ +from collections import defaultdict + +import tabulate + + +class ProblemFields: + def __init__(self): + self._problem_fields = defaultdict(lambda: defaultdict(lambda: 0)) + + def add_problem_field(self, recon_name: str, problem_field: str): + self._problem_fields[recon_name][problem_field] += 1 + + def _get_sorted_problem_fields(self, recon_name: str): + return [ + (k, v) + for k, v in sorted( + self._problem_fields[recon_name].items(), key=lambda x: x[1], reverse=True + ) + ] + + def get_table(self, recon_name: str): + return tabulate.tabulate(self._get_sorted_problem_fields(recon_name), headers=["field", "count"], tablefmt='html') \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/categorizer/types/match_stats.py b/recon_lw/reporting/match_diff/categorizer/types/match_stats.py new file mode 100644 index 0000000..167a508 --- /dev/null +++ b/recon_lw/reporting/match_diff/categorizer/types/match_stats.py @@ -0,0 +1,15 @@ +from collections import defaultdict +from typing import Dict + + +class MatchesStats: + def __init__(self, match_categories: Dict[str, int]=None): + if not match_categories: + match_categories = defaultdict(int) + self.match_categories = match_categories + + def add_match(self, recon_name: str): + self.match_categories[recon_name] += 1 + + def match_number(self, recon_name: str): + return self.match_categories[recon_name] \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/viewer/__init__.py b/recon_lw/reporting/match_diff/viewer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/match_diff/viewer/category_displayer.py b/recon_lw/reporting/match_diff/viewer/category_displayer.py new file mode 100644 index 0000000..7441a66 --- /dev/null +++ b/recon_lw/reporting/match_diff/viewer/category_displayer.py @@ -0,0 +1,202 @@ +import json +from itertools import chain +from typing import List, Tuple, Callable, Dict, Optional + +from IPython.core.display import HTML, Markdown +from IPython.core.display_functions import display +from tabulate import tabulate +from th2_data_services.data import Data + +from recon_lw.core.type.types import Message +from recon_lw.reporting.match_diff.categorizer.types.context import ReconErrorStatsContext +from recon_lw.reporting.match_diff.viewer.color_provider.base import ICategoryColorProvider, \ + ICategoryColorProviderProtocol +from recon_lw.reporting.match_diff.viewer.content_provider.base import IExampleContentProvider +from recon_lw.reporting.match_diff.viewer.style_provider.base import ErrorExamplesStyleProvider, \ + ErrorExamplesStyleProviderProtocol +from recon_lw.reporting.match_diff.viewer.types.types import MatchDiffExampleData +from recon_lw.reporting.match_diff.viewer.utils import get_group_data_map, get_group_from_id, \ + sort_msgs_by_th2_timestamp, get_msgs_by_id +from recon_lw.reporting.recon_context.context import ReconContext + +MessageId = str +IdProvider = Callable[[Message], List[Optional[str]]] + +class ErrorExampleDisplayer: + def __init__( + self, + category_color_provider: ICategoryColorProviderProtocol, + error_examples_styles_provider: ErrorExamplesStyleProviderProtocol + ): + self.category_color_provider = category_color_provider + self.error_examples_styles_provider = error_examples_styles_provider + self._uid = 1 + + def apply_styles(self): + display(HTML(self.error_examples_styles_provider())) + + def display_category(self, category: str, examples: List[Tuple[MatchDiffExampleData, MatchDiffExampleData]]) -> None: + data = self._get_example_comparison_table( + category, + examples, + self._uid + ) + self._uid += 1 + display(HTML(data)) + + def _get_example_comparison_table(self, + category: str, + examples: List[Tuple[MatchDiffExampleData, MatchDiffExampleData]], + uid: int + ): + category_color = self.category_color_provider(category) + category_style = f"background-color: {category_color}" + + content_header = f''' + + + + + ''' + + content_footer = f""" +
{category}
+ """ + + items = ( + self._get_example_tr( + self._get_example_td(example[0], f"colapsable-{uid}-{idx}"), + self._get_example_td(example[1], f"colapsable-{uid}-{-idx}"), + ) + for idx, example in enumerate(examples, start=1) + ) + + return "\n".join(chain((content_header,), items, (content_footer, ))) + + @staticmethod + def _get_example_td(example_data: MatchDiffExampleData, item_id: str): + if isinstance(example_data.message_content, list): + code_mc = '' + for mc in example_data.message_content: + code_mc += f'
{json.dumps(mc, indent=4)}
' + else: + code_mc = f'{json.dumps(example_data.message_content, indent=4)}' + + return f''' + +
+ + +
+
+ {code_mc} +
+
+
+ + ''' + + @staticmethod + def _get_example_tr(td1, td2): + return f''' + + {td1} + {td2} + + ''' + +class MatchDiffViewer: + def __init__( + self, + recon_stats_context: ReconErrorStatsContext, + messages: Data, + data_objects: List[Data], + message_business_ids_provider: IdProvider, + message_content_provider: IExampleContentProvider, + recon_context: ReconContext, + error_example_displayer: ErrorExampleDisplayer + ): + self.context = recon_stats_context + self.events: List[dict] = recon_context.get_recon_events() + self.messages = messages + self.mfr = recon_context.get_mft() + self.id_provider = message_business_ids_provider + self.data_objects: List[Data] = data_objects + self.content_provider: IExampleContentProvider = message_content_provider + self._cache = None + self.error_example_displayer = error_example_displayer + + def _get_cache(self) -> Dict[MessageId, Message]: + if self._cache: + return self._cache + + self._cache = {} + + for message in self.messages: + id = self.mfr.get_id(message) + if self.context.error_examples.is_id_affected(id): + self._cache[id] = message + return self._cache + + def display_report(self): + for recon_name in self.context.error_examples.get_affected_recons(): + display(Markdown(f"### {recon_name}")) + display(Markdown(f"#### {recon_name} full matches = {self.context.matches_stats.match_number(recon_name)}")) + display(Markdown(f"#### {recon_name} fields with problems")) + display(Markdown(f"#### {self.context.problem_fields.get_table(recon_name)}")) + display(Markdown(f"#### {recon_name} matches with diffs")) + + group_data_map = get_group_data_map(self.data_objects, 'default') + self.error_example_displayer.apply_styles() + for category, items in self.context.error_examples.get_examples(recon_name).items(): + examples = [] + for i in items: + msg_id0 = i[0] + group = get_group_from_id(msg_id0) + data_for_group = group_data_map.get(group, group_data_map['default']) + + msg0 = self._get_cache()[msg_id0] + msg_ids = self.id_provider(msg0) + + sorted_msgs0 = None + message_content0 = None + if len(msg_ids) > 0: + matched_msgs = get_msgs_by_id( + data_for_group, + ids=msg_ids, + id_function=self.id_provider + ) + sorted_msgs0 = sort_msgs_by_th2_timestamp(matched_msgs) + message_content0 = self.content_provider.get_example_content(msg_ids, sorted_msgs0) + if not sorted_msgs0: + message_content0 = self.content_provider.get_example_content(msg_ids, [i[0]]) + + msg_id1 = i[1] + group1 = get_group_from_id(msg_id1) + data_for_group1 = group_data_map.get(group1, group_data_map['default']) + + msg1 = self._get_cache()[msg_id1] + msg_ids1 = self.id_provider(msg1) + + sorted_msgs1 = None + message_content1 = None + if len(msg_ids1) > 0: + matched_msgs = get_msgs_by_id( + data_for_group1, + ids=msg_ids1, + id_function=self.id_provider + ) + sorted_msgs1 = sort_msgs_by_th2_timestamp(matched_msgs) + message_content1 = self.content_provider.get_example_content(msg_ids1, sorted_msgs1) + + if not sorted_msgs1: + message_content1 = self.content_provider.get_example_content(msg_ids1, [i[1]]) + + examples.append( + ( + MatchDiffExampleData(i[0], message_content0), + MatchDiffExampleData(i[1], message_content1) + ) + ) + + self.error_example_displayer.display_category(category, list(examples)) \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/viewer/color_provider/__init__.py b/recon_lw/reporting/match_diff/viewer/color_provider/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/match_diff/viewer/color_provider/base.py b/recon_lw/reporting/match_diff/viewer/color_provider/base.py new file mode 100644 index 0000000..90fe660 --- /dev/null +++ b/recon_lw/reporting/match_diff/viewer/color_provider/base.py @@ -0,0 +1,15 @@ +from abc import abstractmethod, ABC +from typing import Protocol + + +class ICategoryColorProvider(ABC): + + def __call__(self, category: str) -> str: + return self.get_category_color(category) + @abstractmethod + def get_category_color(self, category: str) -> str: + pass + +class ICategoryColorProviderProtocol(Protocol): + def __call__(self, category: str) -> str: + pass diff --git a/recon_lw/reporting/match_diff/viewer/color_provider/default.py b/recon_lw/reporting/match_diff/viewer/color_provider/default.py new file mode 100644 index 0000000..22a2ac3 --- /dev/null +++ b/recon_lw/reporting/match_diff/viewer/color_provider/default.py @@ -0,0 +1,6 @@ +from recon_lw.reporting.match_diff.viewer.color_provider.base import ICategoryColorProvider + + +class DefaultCategoryColorProvider(ICategoryColorProvider): + def get_category_color(self, category: str) -> str: + return '#C3B1E1' \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/viewer/content_provider/__init__.py b/recon_lw/reporting/match_diff/viewer/content_provider/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/match_diff/viewer/content_provider/base.py b/recon_lw/reporting/match_diff/viewer/content_provider/base.py new file mode 100644 index 0000000..d785a18 --- /dev/null +++ b/recon_lw/reporting/match_diff/viewer/content_provider/base.py @@ -0,0 +1,10 @@ +from abc import abstractmethod, ABC +from typing import List, Any + +from recon_lw.core.type.types import Message + + +class IExampleContentProvider(ABC): + @abstractmethod + def get_example_content(self, ids: List[str], messages: List[Message]) -> List[Any]: + pass \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/viewer/content_provider/default.py b/recon_lw/reporting/match_diff/viewer/content_provider/default.py new file mode 100644 index 0000000..75085da --- /dev/null +++ b/recon_lw/reporting/match_diff/viewer/content_provider/default.py @@ -0,0 +1,12 @@ +from typing import List, Any + +from recon_lw.core.type.types import Message +from recon_lw.reporting.match_diff.viewer.content_provider.base import IExampleContentProvider + + +class DefaultExampleContentProvider(IExampleContentProvider): + def get_example_content(self, ids: List[str], messages: List[Message]) -> List[Any]: + body = messages[0].get('body') + if isinstance(body, list): + body = body[0] + return [body.get('fields')] diff --git a/recon_lw/reporting/match_diff/viewer/style_provider/__init__.py b/recon_lw/reporting/match_diff/viewer/style_provider/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/match_diff/viewer/style_provider/base.py b/recon_lw/reporting/match_diff/viewer/style_provider/base.py new file mode 100644 index 0000000..98fb617 --- /dev/null +++ b/recon_lw/reporting/match_diff/viewer/style_provider/base.py @@ -0,0 +1,16 @@ +from abc import ABC, abstractmethod +from typing import Protocol + + +class ErrorExamplesStyleProvider(ABC): + + def __call__(self) -> str: + return self.get_styles() + + @abstractmethod + def get_styles(self) -> str: + pass + +class ErrorExamplesStyleProviderProtocol(Protocol): + def __call__(self) -> str: + pass \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/viewer/style_provider/default.py b/recon_lw/reporting/match_diff/viewer/style_provider/default.py new file mode 100644 index 0000000..566731b --- /dev/null +++ b/recon_lw/reporting/match_diff/viewer/style_provider/default.py @@ -0,0 +1,77 @@ +from recon_lw.reporting.match_diff.viewer.style_provider.base import ErrorExamplesStyleProvider + + +class DefaultErrorExamplesStyleProvider(ErrorExamplesStyleProvider): + def get_styles(self) -> str: + return ''' + + ''' \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/viewer/types/__init__.py b/recon_lw/reporting/match_diff/viewer/types/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/match_diff/viewer/types/types.py b/recon_lw/reporting/match_diff/viewer/types/types.py new file mode 100644 index 0000000..2426c0b --- /dev/null +++ b/recon_lw/reporting/match_diff/viewer/types/types.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass +from typing import Union, List, Any + + +@dataclass +class MatchDiffExampleData: + message_id: str + message_content: Union[List[Any], Any] \ No newline at end of file diff --git a/recon_lw/reporting/match_diff/viewer/utils.py b/recon_lw/reporting/match_diff/viewer/utils.py new file mode 100644 index 0000000..504a5f9 --- /dev/null +++ b/recon_lw/reporting/match_diff/viewer/utils.py @@ -0,0 +1,32 @@ +from typing import List, Callable + +from th2_data_services.data import Data +from th2_data_services.utils.converters import Th2TimestampConverter + +from recon_lw.core.type.types import Message + +from th2_data_services.config import options as o + + +def get_group_data_map(datas_list: List[Data], default: str): + return {do.metadata.get('group', default): do for do in datas_list} + +def get_msgs_by_id(data: Data, ids: list, id_function: Callable[[Message], List[str]]): + ids = set(ids) + + res = [] + for m in data: + m_ids = id_function(m) + for id in m_ids: + if id in ids: + res.append(m) + return res + +def get_group_from_id(msg_id: str): + return msg_id.split(':', 2)[1] + +def get_timestamp_ns(m): + return Th2TimestampConverter.to_microseconds(o.emfr.get_timestamp(m)) + +def sort_msgs_by_th2_timestamp(msgs: List[Message]): + return sorted(msgs, key=get_timestamp_ns) \ No newline at end of file diff --git a/recon_lw/reporting/missing_messages/__init__.py b/recon_lw/reporting/missing_messages/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/missing_messages/categorizer/__init__.py b/recon_lw/reporting/missing_messages/categorizer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/missing_messages/categorizer/categorizer_impl.py b/recon_lw/reporting/missing_messages/categorizer/categorizer_impl.py new file mode 100644 index 0000000..5663f5f --- /dev/null +++ b/recon_lw/reporting/missing_messages/categorizer/categorizer_impl.py @@ -0,0 +1,17 @@ +from typing import List, Dict + +from recon_lw.reporting.missing_messages.categorizer.matcher_interface import MissCategorizer +from recon_lw.reporting.missing_messages.categorizer.rule import MissCategorizationRule + + +class SimpleMissesCategorizer(MissCategorizer): + def __init__(self, rules: Dict[str, List[MissCategorizationRule]]): + self.rules = rules + + def __call__(self, recon_error, miss_event): + rules_list = self.rules.get(recon_error, []) + + for rule in rules_list: + if rule.handler(miss_event): + return rule.ticket, rule.comment + return None, None \ No newline at end of file diff --git a/recon_lw/reporting/missing_messages/categorizer/matcher_interface.py b/recon_lw/reporting/missing_messages/categorizer/matcher_interface.py new file mode 100644 index 0000000..f121081 --- /dev/null +++ b/recon_lw/reporting/missing_messages/categorizer/matcher_interface.py @@ -0,0 +1,11 @@ +from typing import Protocol, Dict, Tuple, Optional + + +class MissMatcher(Protocol): + def __call__(self, event: dict) -> bool: + pass + +class MissCategorizer(Protocol): + def __call__(self, recon_error: str, miss_event: Dict) -> Optional[Tuple[str, str]]: + pass + diff --git a/recon_lw/reporting/missing_messages/categorizer/matchers_impl.py b/recon_lw/reporting/missing_messages/categorizer/matchers_impl.py new file mode 100644 index 0000000..9216dc4 --- /dev/null +++ b/recon_lw/reporting/missing_messages/categorizer/matchers_impl.py @@ -0,0 +1,31 @@ +from typing import Dict + +from recon_lw.reporting.missing_messages.categorizer.matcher_interface import MissMatcher + + +class SimpleMatcher(MissMatcher): + def __init__(self, field: str, **kwargs: Dict): + self.field = field + self.conditions = kwargs + + def __call__(self, event): + values = event['body'][self.field] + for key, value in self.conditions.items(): + if values.get(key) != value: + return False + return True + +class SimpleMatcherFlat(MissMatcher): + def __init__(self, **kwargs: Dict): + self.conditions = kwargs + + def __call__(self, event): + values = event['body'] + for key, value in self.conditions.items(): + if values.get(key) != value: + return False + return True + +class MessageMatcherFlat(MissMatcher): + def __init__(self, **kwargs: Dict): + self.conditions = kwargs \ No newline at end of file diff --git a/recon_lw/reporting/missing_messages/categorizer/rule.py b/recon_lw/reporting/missing_messages/categorizer/rule.py new file mode 100644 index 0000000..1b044a5 --- /dev/null +++ b/recon_lw/reporting/missing_messages/categorizer/rule.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass, field +from typing import Optional + +from recon_lw.reporting.known_issues.issue import Issue +from recon_lw.reporting.missing_messages.categorizer.matcher_interface import MissMatcher + + +@dataclass +class MissCategorizationRule: + ticket: Issue + handler: MissMatcher + comment: Optional[str] = field(default=None) \ No newline at end of file diff --git a/recon_lw/reporting/missing_messages/categorizer/utils.py b/recon_lw/reporting/missing_messages/categorizer/utils.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/missing_messages/utils.py b/recon_lw/reporting/missing_messages/utils.py new file mode 100644 index 0000000..fcd8135 --- /dev/null +++ b/recon_lw/reporting/missing_messages/utils.py @@ -0,0 +1,49 @@ +from collections import defaultdict +from io import FileIO + +from recon_lw.interpretation.interpretation_functions import ReconType +from recon_lw.reporting.missing_messages.categorizer.matcher_interface import MissCategorizer +from recon_lw.reporting.recon_context.context import ReconContext + + +class MissedMessageHandler: + def __init__(self, + recon_context: ReconContext, + miss_categoriser: MissCategorizer): + self.recon_context = recon_context + self.efr = recon_context.get_efr() + self.mfr = recon_context.get_mft() + self.miss_categoriser = miss_categoriser + + def write_to_file(self, file: FileIO): + for e in self.recon_context.get_recon_events(): + if self.efr.get_status(e): + continue + type = self.efr.get_type(e) + recon_name = e['reconName'] + attached = e["attachedMessageIds"] + if type == ReconType.BasicReconMissLeft: + print("\t\t NO_ORIG", recon_name, attached, e['body']['key'], file) + elif type == ReconType.BasicReconMissRight: + print("\t\t NO_COPY", recon_name, attached, e['body']['key'], file) + + def categorise_and_filter(self, messages): + missed_message_ids = {} + error_categories = defaultdict(int) + for e in self.recon_context.get_recon_events(): + if self.efr.get_status(e): + continue + type = self.efr.get_type(e) + recon_name = e['recon_name'] + attached = e["attachedMessageIds"] + if type == ReconType.BasicReconMissLeft.value: + error_kind = f"no_orig {recon_name}" + elif type == ReconType.BasicReconMissRight.value: + error_kind = f"no_copy {recon_name}" + else: + error_kind = None + if error_kind: + missed_message_ids[attached[0]] = error_kind + error_categories[(error_kind,) + self.miss_categoriser(error_kind, e)] += 1 + return messages.filter(lambda m: m['messageId'] in missed_message_ids), error_categories + diff --git a/recon_lw/reporting/missing_messages/viewer/__init__.py b/recon_lw/reporting/missing_messages/viewer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/missing_messages/viewer/missing_message.py b/recon_lw/reporting/missing_messages/viewer/missing_message.py new file mode 100644 index 0000000..5864d22 --- /dev/null +++ b/recon_lw/reporting/missing_messages/viewer/missing_message.py @@ -0,0 +1,37 @@ +from typing import Dict, Tuple + +import tabulate +from IPython.core.display_functions import display + +Count = int +MissCategory = Tuple + +class MissingMessagesDisplayer: + def __init__(self, + missed_messages_categories: Dict[MissCategory, Count] + ): + self.classes = missed_messages_categories + + def display(self, missed_messages): + + classes = self.classes + + simple_misses_table = [] + for miss_category, miss_counter in classes.items(): + if miss_category[1:] != (None, None): + error_kind, miss_issue, miss_commentary = miss_category + simple_misses_table.append((error_kind, miss_issue, miss_commentary, miss_counter)) + else: + simple_misses_table.append((miss_category[0], "UNCATEGORIZED", '', miss_counter)) + simple_misses_table.append(('total', '', '', sum(classes.values()))) + + + display( + tabulate.tabulate( + simple_misses_table, + headers=['recon', 'miss_issue', 'miss_commentary', 'count'], + tablefmt='html' + ) + ) + + diff --git a/recon_lw/reporting/recon_context/__init__.py b/recon_lw/reporting/recon_context/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/recon_context/context.py b/recon_lw/reporting/recon_context/context.py new file mode 100644 index 0000000..bbc241a --- /dev/null +++ b/recon_lw/reporting/recon_context/context.py @@ -0,0 +1,39 @@ +from typing import Dict + +from th2_data_services.data_source.lwdp.resolver import MessageFieldResolver, EventFieldResolver + +from recon_lw.interpretation.interpretation_functions import ReconType +from recon_lw.reporting.recon_metadata.base import ReconMetadata +from recon_lw.reporting.utils import get_recon_events + + +class ReconContext: + def __init__(self, + recon_events_directory: str, + message_fields_resolver: MessageFieldResolver, + event_fields_resolver: EventFieldResolver + ): + self.recon_events_directory = recon_events_directory + self._recons_metadata = {} + self._recon_events: Dict[str, ReconMetadata] = None + self.mft = message_fields_resolver + self.eft = event_fields_resolver + + def get_efr(self): + return self.eft + + def get_mft(self): + return self.mft + + def get_recon_events(self, update_cache: bool = False): + if self._recon_events and not update_cache: + return self._recon_events + self._recon_events = get_recon_events(self.recon_events_directory) + return self._recon_events + + + def update_recon_metadata(self, recon_metadata: ReconMetadata): + self._recons_metadata[recon_metadata.recon_name] = recon_metadata + + def get_metadata(self)-> Dict[str, ReconMetadata]: + return self._recons_metadata \ No newline at end of file diff --git a/recon_lw/reporting/recon_metadata/__init__.py b/recon_lw/reporting/recon_metadata/__init__.py new file mode 100644 index 0000000..81399e2 --- /dev/null +++ b/recon_lw/reporting/recon_metadata/__init__.py @@ -0,0 +1 @@ +from recon_lw.reporting.recon_metadata.base import * \ No newline at end of file diff --git a/recon_lw/reporting/recon_metadata/base.py b/recon_lw/reporting/recon_metadata/base.py new file mode 100644 index 0000000..97f56d3 --- /dev/null +++ b/recon_lw/reporting/recon_metadata/base.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass +from typing import List + +@dataclass +class FieldMetadata: + field_name: str + description: str + +@dataclass +class ReconMetadata: + recon_name: str + covered_fields: List[FieldMetadata] \ No newline at end of file diff --git a/recon_lw/reporting/stats/__init__.py b/recon_lw/reporting/stats/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recon_lw/reporting/stats/stats.py b/recon_lw/reporting/stats/stats.py new file mode 100644 index 0000000..42cd65f --- /dev/null +++ b/recon_lw/reporting/stats/stats.py @@ -0,0 +1,29 @@ +from th2_data_services.utils.category import Category +from th2_data_services.utils.event_utils.totals import get_category_totals2 + +from recon_lw.reporting.recon_context.context import ReconContext + + +class EventStatisticsTableReport: + + def __init__(self, recon_context: ReconContext): + self.recon_context = recon_context + self.efr = recon_context.get_efr() + + def get_event_type_report_table(self): + return get_category_totals2( + self.recon_context.get_recon_events(), + [ + Category("Event Type", self.efr.get_type), + Category("Status", self.efr.get_status) + ] + ).sort_by(["Event Type", "Status"]) + + def get_event_names_report_table(self): + return get_category_totals2( + self.recon_context.get_recon_events(), + [ + Category("Event Name", self.efr.get_name), + Category("Status", self.efr.get_status) + ] + ).sort_by(["Event Name", "Status"]) \ No newline at end of file diff --git a/recon_lw/reporting/utils.py b/recon_lw/reporting/utils.py new file mode 100644 index 0000000..66cf078 --- /dev/null +++ b/recon_lw/reporting/utils.py @@ -0,0 +1,16 @@ +import os +from typing import List + +from th2_data_services.data import Data +from th2_data_services.utils.event_utils import totals + + +def get_recon_events(events_directory: str): + files = os.listdir(events_directory) + rslt = Data([]) + for f in files: + if not f.endswith(".pickle"): + continue + rslt += Data.from_cache_file(os.path.join(events_directory, f)) + + return rslt \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 3c36624..e52f4c0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,3 @@ -#wheel~=0.38 # To get rid of wheel building errors -#sseclient-py~=1.7 -#grpcio-tools==1.38.1 # Do not change -#requests~=2.28 -#setuptools~=65.5 -#urllib3~=1.26 -#treelib==1.6.1 -#types-protobuf==3.19.22 -#importlib_metadata -#orjson~=3.8 sortedcontainers th2-data-services~=2.0.0.dev th2-data-services-lwdp>=2.1 # We cannot set exact version because it can be as 2 and as 3. diff --git a/template/adapters/base_adapter.py b/template/adapters/base_adapter.py deleted file mode 100644 index 1f6f1fe..0000000 --- a/template/adapters/base_adapter.py +++ /dev/null @@ -1,202 +0,0 @@ -from abc import abstractmethod, ABC -from typing import Callable, Dict, Any - -Msg = dict -FieldVal = Any -FieldGetterFunc = Callable[[Msg], FieldVal] - - -# class a: -# -# -# def get(self, field) -> FieldGetterFunc: -# return self.mapping[field] - - -class IBaseAdapter(ABC): - covered_fields = set() - body_field = "body" - - def __init__(self): - self.mapping = self.init_mapping() - - @abstractmethod - def init_mapping(self) -> Dict[str, FieldGetterFunc]: - """ - Examples: - {field1: get_field1_func} - - Returns: - - """ - - @abstractmethod - def get_fields_group(self, m, group_name): - return None - - @abstractmethod - def on_message(self, m): - """This is triggered when the message arrives in the recon - - Was done mostly for ack_handler - So that you can track the state - - return None in your Adapter if it's not required. - """ - pass - - @abstractmethod - def on_message_exit(self, m): - """This is triggered after the message has been processed - - Was done mostly for ack_handler - So that you can track the state - - return None in your Adapter if it's not required. - """ - pass - - def get(self, item, field, strict=False): - actual_field = self.mapping[field] - if isinstance(actual_field, Callable): - val = actual_field(item) - else: - val = item[self.body_field].get(actual_field, self.NE) - - if strict and val == self.NE: - raise KeyError(field) - - if val != self.NE: - val = str(val) - - return val - - def basic_conv_handler(self, item, field, converter): - val = item[self.body_field].get(field, self.NE) - if val != self.NE: - val = str(val) - return converter(item, field, val) - - def build_base_handler(self, field): - def handler(m, _): - val = m[self.body_field].get(field, self.NE) - if val != self.NE: - val = str(val) - return val - - return handler - - def build_conv_handler(self, field, converter): - def handler(m, _): - val = m[self.body_field].get(field, self.NE) - if val in {self.NE, None}: - return val - - val = str(val) - - return converter(m, field, val) - - return handler - - def get_simple_conv_handler(self, field, converter, pass_NE=False): - def simple_conv(item, field, val): - if val in {None, self.NE} and not pass_NE: - return val - return converter(val) - - def fun(item, _): - return self.basic_conv_handler( - item=item, field=field, converter=simple_conv - ) - - return fun - - def get_dict_handler(self, field, mapping): - def dict_converter(item, field, val): - result = mapping.get(val, b"404") - if result == b"404" and val not in {None, self.NE}: - if val == '300' or val == 300: - raise KeyError( - f"Uncovered value {val} for field {field} in mapping {mapping}") - return f"Unknown value {val}" - - if result == b"404": - return val - return result - - def fun(item, _): - return self.basic_conv_handler( - item=item, field=field, converter=dict_converter - ) - - return fun - - def build_default_value_handler(self, field, default): - base_handler = self.build_base_handler(field) - - def handler(m, _): - val = base_handler(m, _) - if val == self.NE: - return default - - return val - - return handler - - def build_constant_handler(self, value): - def handler(m, _): - return value - - return handler - - def build_conditional_masking_handler( - self, field_or_handler, condition, mask_value - ): - if isinstance(field_or_handler, str): - base_handler = self.build_base_handler(field_or_handler) - else: - base_handler = field_or_handler - - def handler(m, _): - if condition(m): - return mask_value - - val = base_handler(m, _) - return val - - return handler - - NE = "_NE_" - "Not exists" - - -class CompoundAdapter(IBaseAdapter): - def __init__(self, *adapters): - super().__init__() - self.adapters = adapters - - def init_mapping(self) -> Dict[str, FieldGetterFunc]: - pass - - def get_adapter(self, m): - for condition, adapter in self.adapters: - if condition(m): - return adapter - - raise SystemError(f"Can't handle {m} - no matching adapter") - - def get(self, item, field, strict=False): - handler = self.get_adapter(item) - return handler.get(item, field, strict) - - def on_message(self, m): - handler = self.get_adapter(m) - return handler.on_message(m) - - def on_message_exit(self, m): - handler = self.get_adapter(m) - return handler.on_message_exit(m) - - def get_fields_group(self, m, group_name): - handler = self.get_adapter(m) - return handler.get_fields_group(m, group_name) diff --git a/template/adapters/readme.md b/template/adapters/readme.md deleted file mode 100644 index 9f8b580..0000000 --- a/template/adapters/readme.md +++ /dev/null @@ -1,4 +0,0 @@ -The repo with Py modules that contain ... - - -Adapters are just a dicts that provide field value in the required format \ No newline at end of file diff --git a/template/adapters/stream1_adapter.py b/template/adapters/stream1_adapter.py deleted file mode 100644 index 5aaa6d9..0000000 --- a/template/adapters/stream1_adapter.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import Dict - -from template.adapters.base_adapter import IBaseAdapter, FieldGetterFunc -from th2_data_services.data_source import lwdp -from th2_data_services.config.config import options as o - - -def get_field_func(field): - def get_field(msg): - return o.emfr.get_fields(msg)[field] - - return get_field - - -class Stream1Adapter(IBaseAdapter): - - def init_mapping(self) -> Dict[str, FieldGetterFunc]: - """ - Recommendations: - 1. Use pairwise names like `msgField1_msgField2` if you have - different names for the same field in the messages - # TODO -- probably it's better to use some Enum here instead - - """ - return { - 'field1': get_field_func('field1'), - 'msgField1_msgField2': get_field_func('field1'), - } - - def get_fields_group(self, m, group_name): - if group_name == "order_ids": - return { - "OrderID": self.get(m, "order_id"), - "ClOrdID": self.get(m, "clordid"), - } - - def on_message(self, m): - pass - - def on_message_exit(self, m): - pass diff --git a/template/adapters/stream2_adapter.py b/template/adapters/stream2_adapter.py deleted file mode 100644 index 1f4f65f..0000000 --- a/template/adapters/stream2_adapter.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Dict - -from template.adapters.base_adapter import IBaseAdapter, FieldGetterFunc -from th2_data_services.data_source import lwdp -from th2_data_services.config.config import options as o - - -def get_field_func(field): - def get_field(msg): - return o.emfr.get_fields(msg)[field] - - return get_field - - -class Stream2Adapter(IBaseAdapter): - - def init_mapping(self) -> Dict[str, FieldGetterFunc]: - """ - Recommendations: - 1. Use pairwise names like `msgField1_msgField2` if you have - different names for the same field in the messages - # TODO -- probably it's better to use some Enum here instead - - """ - return { - 'field1': get_field_func('field1') - } - - def get_fields_group(self, m, group_name): - if group_name == "order_ids": - return { - "OrderID": self.get(m, "order_id"), - "ClOrdID": self.get(m, "clordid"), - } - - def on_message(self, m): - pass - - def on_message_exit(self, m): - pass diff --git a/template/download_data.py b/template/download_data.py deleted file mode 100644 index b37bcb3..0000000 --- a/template/download_data.py +++ /dev/null @@ -1,3 +0,0 @@ - -def get_messages(): - raise NotImplementedError \ No newline at end of file diff --git a/template/fields_checker.py b/template/fields_checker.py deleted file mode 100644 index 3ee38c4..0000000 --- a/template/fields_checker.py +++ /dev/null @@ -1,67 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Any, Callable, Iterator, Tuple, Optional, Dict - -from template.adapters.base_adapter import IBaseAdapter - - -@dataclass -class FieldCheckResult: - field: str - left_val: Any - right_val: Any - result: Any - check_comment: Optional[str] = None - - -class IFieldCheckRule(ABC): - def __call__(self, field, msg1, msg2): - return self.handler(field, msg1, msg2) - - @abstractmethod - def handler(self, field, msg1, msg2) -> FieldCheckResult: - pass - - -class IAdapterFieldCheckRule(IFieldCheckRule, ABC): - def __init__(self, stream1_adapter: IBaseAdapter, - stream2_adapter: IBaseAdapter): - self.stream1_adapter = stream1_adapter - self.stream2_adapter = stream2_adapter - - def get_field_values(self, field, msg1, msg2): - v1 = self.stream1_adapter.get(msg1, field) - v2 = self.stream2_adapter.get(msg2, field) - return v1, v2 - - -class EqualFieldCheckRule(IAdapterFieldCheckRule): - def handler(self, field, msg1, msg2) -> FieldCheckResult: - v1, v2 = self.get_field_values(field, msg1, msg2) - - return FieldCheckResult( - field=field, - left_val=v1, - right_val=v2, - result=v1 == v2, - check_comment='Equal comparison' - ) - - -@dataclass -class FieldToCheck: - field: str - check_rule: IFieldCheckRule - - -def get_simple_fields_checker( - fields_to_check: Dict[str, FieldToCheck] -) -> Callable[[dict, dict], Iterator[FieldCheckResult]]: - def simple_fields_checker(msg1, msg2): - for field, ftc in fields_to_check.items(): - check_rule_result = ftc.check_rule(field, msg1, msg2) - - if check_rule_result.result is False: - yield check_rule_result - - return simple_fields_checker diff --git a/template/interpret_functions.py b/template/interpret_functions.py deleted file mode 100644 index 1ac8c36..0000000 --- a/template/interpret_functions.py +++ /dev/null @@ -1,206 +0,0 @@ -from dataclasses import dataclass -from typing import List, Callable -from collections import defaultdict -from typing import Tuple, Any, Generator, Iterator, Callable - -from recon_lw import recon_lw -from template.adapters.base_adapter import IBaseAdapter -from template.fields_checker import FieldCheckResult -from template.recon_event_types import ReconType - - -@dataclass -class Counters: - match_ok: int = 0 - match_fail: int = 0 - no_right: int = 0 - no_left: int = 0 - - -def get_interpret_func( - # match_msgs, - orig_adapter, - copy_adapter, - event_name_prefix, - fields_checker: Callable, - counters: Counters, - first_key_func, - second_key_func, -): - def interpret_func(match_msgs: List[dict], _, event_sequence: dict): - """ - - Args: - match_msgs: list of matched messages - _: - event_sequence: - - Returns: - - """ - return compare_2_msgs( - match_msgs=match_msgs, - event_sequence=event_sequence, - orig_adapter=orig_adapter, - copy_adapter=copy_adapter, - fields_checker=fields_checker, - event_name_prefix=event_name_prefix, - counters=counters, - first_key_func=first_key_func, - second_key_func=second_key_func - ) - - return interpret_func - - -def _get_miss_event(msg, event_name_prefix, - match_key, - recon_type: ReconType, counters: Counters, - order_ids, - event_sequence): - if recon_type == ReconType.BasicReconMissLeft: - counters.no_left += 1 - name = f"{event_name_prefix}_[no_left]" - elif recon_type == ReconType.BasicReconMissRight: - counters.no_right += 1 - name = f"{event_name_prefix}_[no_right]" - else: - raise Exception('unexpected behaviour') - - body = {"key": match_key} - - if order_ids: - body["order_ids"] = order_ids - - event = recon_lw.create_event( - name=name, - type=recon_type.value, - ok=False, - event_sequence=event_sequence, - body=body, - ) - event["attachedMessageIds"] = [msg["messageId"]] - - return event - - -def compare_2_msgs( - match_msgs, - event_sequence: dict, - orig_adapter: IBaseAdapter, - copy_adapter: IBaseAdapter, - fields_checker: Callable[[dict, dict], Iterator[FieldCheckResult]], - event_name_prefix, - counters: Counters, - first_key_func, - second_key_func, -): - msg1 = match_msgs[0] - msg2 = match_msgs[1] - events = [] - if msg1 is not None and msg2 is not None: - name = f"{event_name_prefix}_[match]" - - body = {} - diff_list = [] - # field, orig_value, copy_value - differences: Iterator[FieldCheckResult] = fields_checker(msg1, msg2) - status = True - - if differences: - status = False - for fcr in differences: - diff_list.append( - dict(field=fcr.field, expected=fcr.left_val, - actual=fcr.right_val) - ) - - order_ids = orig_adapter.get_fields_group( - msg1, "order_ids" - ) or copy_adapter.get_fields_group(msg2, "order_ids") - - if order_ids: - body["order_ids"] = order_ids - - if not status: - name = f"{name}[diff_found]" - counters.match_fail += 1 - body["diff"] = diff_list - else: - counters.match_ok += 1 - - event = recon_lw.create_event( - name=name, - type=ReconType.BasicReconMatch.value, - event_sequence=event_sequence, - ok=status, - body=body, - ) - event["attachedMessageIds"] = [m["messageId"] for m in match_msgs if - m is not None] - events.append(event) - orig_adapter.on_message_exit(msg1) - copy_adapter.on_message_exit(msg2) - - elif msg1 is not None: - # counters["no_right"] += 1 - # body = {"key": first_key_func(msg1)} - # # TODO -- get_fields_group что это ??? - # order_ids = orig_adapter.get_fields_group(msg1, "order_ids") - # - # if order_ids: - # body["order_ids"] = order_ids - # - # name = f"{event_name_prefix}_[no_right]" - # event = recon_lw.create_event( - # name=name, - # type=ReconType.BasicReconMissLeft.value, - # ok=False, - # event_sequence=event_sequence, - # body=body, - # ) - # event["attachedMessageIds"] = [msg1["messageId"]] - - order_ids = orig_adapter.get_fields_group(msg1, "order_ids") - match_key = first_key_func(msg1) - event = _get_miss_event(msg1, event_name_prefix, - match_key=match_key, - recon_type=ReconType.BasicReconMissLeft, - counters=counters, - order_ids=order_ids, - event_sequence=event_sequence) - events.append(event) - orig_adapter.on_message_exit(msg1) - - elif msg2 is not None: - # counters["no_left"] += 1 - # - # body = {"key": second_key_func(msg2)} - # order_ids = copy_adapter.get_fields_group(msg2, "order_ids") - # - # if order_ids: - # body["order_ids"] = order_ids - # - # name = f"{event_name_prefix}_[no_left]" - # event = recon_lw.create_event( - # name=name, - # type=ReconType.BasicReconMissRight.value, - # ok=False, - # event_sequence=event_sequence, - # body=body, - # ) - # event["attachedMessageIds"] = [m["messageId"] for m in match_msgs if - # m is not None] - - order_ids = copy_adapter.get_fields_group(msg2, "order_ids") - match_key = second_key_func(msg2) - event = _get_miss_event(msg2, event_name_prefix, - match_key=match_key, - recon_type=ReconType.BasicReconMissRight, - counters=counters, - order_ids=order_ids, - event_sequence=event_sequence) - events.append(event) - copy_adapter.on_message_exit(msg2) - - return events diff --git a/template/matching_functions.py b/template/matching_functions.py deleted file mode 100644 index 03a18e5..0000000 --- a/template/matching_functions.py +++ /dev/null @@ -1,139 +0,0 @@ -from typing import Callable - -from template.adapters.base_adapter import IBaseAdapter - - -# TODO - move to another place -class _AnyValBase: - def __eq__(self, other): - return True - - -AnyVal = _AnyValBase() - - -def get_refdata_field(field_name, security_id, refdata): - return refdata[security_id][field_name] - - -def check_message_field(field, check_fun): - def decorator(fun): - def wrapper(m, *args, **kwargs): - if check_fun(m[field]): - return fun(m) - - return wrapper - - return decorator - - -def check_session_prefix(prefix): - return check_message_field( - field="sessionId", check_fun=lambda s: s.startswith(prefix) - ) - - -def check_session(session): - return check_message_field(field="sessionId", - check_fun=lambda s: s == session) - - -def get_simple_matching_func( - -) -> Callable: - def simple_matching_func(m): - """Should return matching key""" - - return 123 - - return simple_matching_func - - -def get_matching_key(adapter: IBaseAdapter, item, *fields, sep=":"): - def scale_item(val, count): - if len(val) == count: - for x in val: - yield x - else: - v = val[0] - for _ in range(count): - yield v - - result = [] - items = {} - max_count = 1 - for field in fields: - val = adapter.get(item, field, strict=True) - if not isinstance(val, list): - val = [val] - items[field] = val - l = len(val) - if l != 1 and max_count != 1 and l != max_count: - raise SystemError( - f"Diff found {max_count} != {l} | {adapter.__class__.__name__} " - f"| {field} | {item}" - ) - - max_count = max(max_count, len(val)) - - z = list( - zip( - *(scale_item([str(x) for x in items[field]], max_count) for field in - fields) - ) - ) - - result = [sep.join(chunks) for chunks in z] - return result - - -def basic_matching_key_fun( - is_orig, adapter: IBaseAdapter, fields, - filter_fun=None, session=None, - alias_categorizer=None, alias_category=None -): - if not filter_fun: - def filter_fun(x, adapter_): - return True - - def fun_orig(m): - if filter_fun(m, adapter): - adapter.on_message(m) - mks = get_matching_key(adapter, m, *fields) - return mks - - def fun_copy(m): - if filter_fun(m, adapter): - adapter.on_message(m) - mks = get_matching_key(adapter, m, *fields) - if len(mks) > 1: - raise SystemError( - f"Copy matching fun can have only single value, received {mks}" - ) - return mks[0] - - if is_orig: - fun = fun_orig - else: - fun = fun_copy - - session_checks = [] - - if session: - if isinstance(session, set): - session_checks.append(lambda alias: alias in session) - else: - session_checks.append(lambda alias: alias == session) - - if alias_category: - if isinstance(alias_category, set): - session_checks.append( - lambda alias: alias_categorizer(alias) in alias_category) - else: - session_checks.append( - lambda alias: alias_categorizer(alias) == alias_category) - - fun = check_message_field("sessionId", lambda alias: all( - c(alias) for c in session_checks))(fun) - - return fun diff --git a/template/recon.py b/template/recon.py deleted file mode 100644 index 0b27e8e..0000000 --- a/template/recon.py +++ /dev/null @@ -1,52 +0,0 @@ -from th2_data_services.config import options -from th2_data_services.data import Data - -import sys -from th2_data_services.data_source import ( - lwdp, -) # Required. Will initialize options resolvers - -from template.download_data import get_messages -from template.rules.rule1 import rule1 - -efr = options.EVENT_FIELDS_RESOLVER -mfr = options.MESSAGE_FIELDS_RESOLVER - -from recon_lw import recon_lw - - -def all_recons( - events_dir, - metadata, - config, -): - """ - Entry point to run your rules. - - Args: - events_dir: - metadata: - config: - - Returns: - - """ - enabled_recons = config["recons"] - - rule1_cfg = ( - {} - if "Rule1" not in enabled_recons - else rule1("Rule1", metadata) - ) - - rules = rule1_cfg - - recon_lw.execute_standalone( - message_pickle_path=None, - sessions_list=None, - result_events_path=events_dir, - rules_settings_dict=rules, - data_objects=get_messages() - ) - - return metadata diff --git a/template/rules/rule1.py b/template/rules/rule1.py deleted file mode 100644 index 05bfffb..0000000 --- a/template/rules/rule1.py +++ /dev/null @@ -1,104 +0,0 @@ -from recon_lw import recon_lw -from template.adapters.stream1_adapter import Stream1Adapter -from template.adapters.stream2_adapter import Stream2Adapter -from template.fields_checker import EqualFieldCheckRule, \ - FieldToCheck, get_simple_fields_checker -from template.interpret_functions import Counters, get_interpret_func -from template.matching_functions import get_simple_matching_func, \ - basic_matching_key_fun - - -# Filter example -# def get_fix_filter( -# check_exec_type=True, -# prefix="fix_", -# blacklisted_exec_types=None, -# ): -# if not blacklisted_exec_types: -# blacklisted_exec_types = {"Done"} -# -# def fix_filter(m, adapter: Adapter): -# if prefix and not m["sessionId"].startswith(prefix): -# return False -# -# if m["messageType"] != "ExecutionReport": -# return False -# -# if adapter.get(m, "poss_dup") == "True": -# return False -# -# if check_exec_type: -# exec_type = adapter.get(m, "orderbook_execution_type") -# if exec_type in blacklisted_exec_types: -# return False -# -# return True -# -# return fix_filter - - -def rule1(recon_name, metadata): - stream1_adapter = Stream1Adapter() - stream2_adapter = Stream2Adapter() - - equal_field_checker_rule = EqualFieldCheckRule( - stream1_adapter=stream1_adapter, - stream2_adapter=stream2_adapter - ) - - fields_to_check = { - ftc.field: ftc - for ftc in [ - # field name as described in the adapters - FieldToCheck('order_id', equal_field_checker_rule), - FieldToCheck('clordid', equal_field_checker_rule), - ] - } - - fields_to_compare = list(fields_to_check.keys()) - - metadata["recons"][recon_name] = {"matchingFields": fields_to_compare} - - fix_fields_checker = get_simple_fields_checker( - fields_to_check - ) - - counters = Counters() - - stream1_key_fun = basic_matching_key_fun( - is_orig=True, - adapter=stream1_adapter, - fields=["exec_id", "dc_target"], - # filter_fun=get_fix_filter( - # True, True, prefix=None, blacklisted_exec_types={"Rejected"} - # ), - alias_categorizer=get_alias_category, - alias_category={AliasCategory.FIX_OE, AliasCategory.BIN_OE}, - ) - stream2_key_fun = basic_matching_key_fun( - is_orig=False, - adapter=stream2_adapter, - fields=["exec_id", "target_comp_id"], - # filter_fun=get_fix_filter(False, False, prefix=None), - alias_categorizer=get_alias_category, - alias_category=AliasCategory.FIX_DC, - ) - - rules = { - recon_name: { - "first_key_func": stream1_key_fun, - "second_key_func": stream2_key_fun, - "interpret_func": get_interpret_func( - orig_adapter=stream1_adapter, - copy_adapter=stream2_adapter, - event_name_prefix="YOUR_PREFIX_NAME", - fields_checker=fix_fields_checker, - counters=counters, - first_key_func=stream1_key_fun, - second_key_func=stream2_key_fun, - ), - "horizon_delay": 180, - "rule_match_func": recon_lw.one_many_match, - } - } - return rules diff --git a/template/utils.py b/template/utils.py deleted file mode 100644 index 8b7ac07..0000000 --- a/template/utils.py +++ /dev/null @@ -1,113 +0,0 @@ -from collections import defaultdict - - -def aggregate_list_field( - m, - body_field, - field, - keys=None, - values=None, - sep="/", - mem_field="", - result_type=dict, - keys_remap=None, -): - """ - keys = [a,b,c]c - values = [d,e,f] - - field = field - - body = { - field.0.a : "1", - field.0.b : "2", - field.0.c : "3", - field.0.d : "4", - field.0.e : "5", - field.0.f : "6", - field.1.a : "01", - field.1.b : "02", - field.1.c : "03", - field.1.d : "04", - field.1.e : "05", - field.1.f : "06", - } - - result = { - 1/2/3: 4/5/6 - 01/02/03: 04/05/06 - } - - - :param m: - :param body_field: - :param field: - :param keys: - :param values: - :param sep: - :param mem_field: - :return: - """ - - body = m[body_field] - - result = body.get(mem_field) - if result: - return result - - buffer = defaultdict(dict) - field += "." - for k, v in body.items(): - if k.startswith(field): - # NoPartyIDs.2.PartyID - _, n, sub_field = k.split(".", maxsplit=2) - if keys_remap: - sub_field = keys_remap.get(sub_field, sub_field) - if not sub_field: - continue - buffer[n][sub_field] = v - - if result_type == dict: - result = {} - for _, v in buffer.items(): - key = sep.join([v[k] for k in keys]) - value = sep.join([v[k] for k in values]) - - if key in result: - raise ValueError(f"Duplicated key = {key}, message - {m}") - - result[key] = value - elif result_type == list: - result = list(buffer.values()) - - m[body_field][mem_field] = result - - return result - - -def get_list_handler(adapter, field_name, keys_remap=None): - def handler(m, _): - data = aggregate_list_field( - m=m, - body_field=adapter.body_field, - field=field_name, - mem_field=f"_{field_name}_cache", - result_type=list, - keys_remap=keys_remap - ) - return data - - return handler - - -def get_list_size_handler(adapter, field_name): - list_handler = get_list_handler(adapter, field_name) - - def handler(m, _): - val = list_handler(m, _) - if val and val != adapter.NE: - return len(val) - - return adapter.NE - - return handler