diff --git a/libcovebods/data_reader.py b/libcovebods/data_reader.py index c52c433..0b5385a 100644 --- a/libcovebods/data_reader.py +++ b/libcovebods/data_reader.py @@ -2,9 +2,12 @@ import ijson # type: ignore -STATEMENT_MAPPING = {"entity": "entityStatement", - "person": "personStatement", - "relationship": "ownershipOrControlStatement"} +STATEMENT_MAPPING = { + "entity": "entityStatement", + "person": "personStatement", + "relationship": "ownershipOrControlStatement", +} + def get_statement_type(statement): if isinstance(statement, dict): @@ -15,6 +18,7 @@ def get_statement_type(statement): return STATEMENT_MAPPING[statement.get("recordType")] return "unknown" + class DataReader: """Class to hold information on where to get data and provides methods to access it. @@ -53,12 +57,12 @@ def get_all_data(self): with open(self._filename, "rb") as fp: for statement in ijson.items(fp, "item"): statementType = get_statement_type(statement) - #statementType = ( + # statementType = ( # statement.get("statementType") # if isinstance(statement, dict) # and isinstance(statement.get("statementType"), str) # else "unknown" - #) + # ) if statementType in count_statement_types: if ( count_statement_types[statementType] diff --git a/libcovebods/tasks/checks.py b/libcovebods/tasks/checks.py index 95fceb4..43e5da8 100644 --- a/libcovebods/tasks/checks.py +++ b/libcovebods/tasks/checks.py @@ -1100,7 +1100,9 @@ def check_entity_statement_first_pass(self, statement): { "type": "statement_entity_securities_listings_haspubliclisting_is_false", "statement_type": None, - "securities_listings": statement["recordDetails"]["publicListing"]["securitiesListings"], + "securities_listings": statement["recordDetails"][ + "publicListing" + ]["securitiesListings"], "statement": statement.get("statementId"), } ) @@ -1352,18 +1354,32 @@ def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool: return schema_object.is_schema_version_equal_to_or_greater_than("0.4") def check_statement_first_pass(self, statement): - if ("recordId" in statement and "statementDate" in statement and - "statementDate" in statement and statement["statementDate"]): + if ( + "recordId" in statement + and "statementDate" in statement + and "statementDate" in statement + and statement["statementDate"] + ): record_status = statement.get("recordStatus") record_type = statement.get("recordType") statement_id = statement.get("statementId") if statement["recordId"] in self._series: self._series[statement["recordId"]].append( - [statement["statementDate"], record_status, record_type, statement_id] + [ + statement["statementDate"], + record_status, + record_type, + statement_id, + ] ) else: self._series[statement["recordId"]] = [ - [statement["statementDate"], record_status, record_type, statement_id] + [ + statement["statementDate"], + record_status, + record_type, + statement_id, + ] ] def final_checks(self): @@ -1510,7 +1526,9 @@ def check_ownership_or_control_statement_second_pass(self, statement): "type": "interested_party_must_be_record_id", "statement_type": None, "statement": statement.get("statementId"), - "interested_party": statement["recordDetails"]["interestedParty"], + "interested_party": statement["recordDetails"][ + "interestedParty" + ], } ) elif self._records[ @@ -1521,7 +1539,9 @@ def check_ownership_or_control_statement_second_pass(self, statement): "type": "interested_party_can_only_refer_to_entity_or_person", "statement_type": None, "statement": statement.get("statementId"), - "interested_party": statement["recordDetails"]["interestedParty"], + "interested_party": statement["recordDetails"][ + "interestedParty" + ], } ) else: @@ -1544,7 +1564,9 @@ def check_ownership_or_control_statement_second_pass(self, statement): "type": "interest_beneficial_ownership_interested_party_not_person", "statement_type": None, "statement": statement.get("statementId"), - "interested_party": statement["recordDetails"]["interestedParty"], + "interested_party": statement[ + "recordDetails" + ]["interestedParty"], } ) @@ -1626,8 +1648,12 @@ def check_ownership_or_control_statement_second_pass(self, statement): "type": "relationship_interests_subject_should_be_entity_nomination_arrangement", "statement_type": None, "statement": statement.get("statementId"), - "subject_record_type": self._records[statement["recordDetails"]["subject"]][0], - "subject_record_subtype": self._records[statement["recordDetails"]["subject"]][1], + "subject_record_type": self._records[ + statement["recordDetails"]["subject"] + ][0], + "subject_record_subtype": self._records[ + statement["recordDetails"]["subject"] + ][1], } ) else: @@ -1647,8 +1673,12 @@ def check_ownership_or_control_statement_second_pass(self, statement): "type": "relationship_interests_subject_should_be_entity_nomination_arrangement", "statement_type": None, "statement": statement.get("statementId"), - "subject_record_type": self._records[statement["recordDetails"]["subject"]][0], - "subject_record_subtype": self._records[statement["recordDetails"]["subject"]][1], + "subject_record_type": self._records[ + statement["recordDetails"]["subject"] + ][0], + "subject_record_subtype": self._records[ + statement["recordDetails"]["subject"] + ][1], } ) elif "type" in interest and interest["type"] in ( @@ -1670,8 +1700,12 @@ def check_ownership_or_control_statement_second_pass(self, statement): "type": "relationship_interests_subject_should_be_entity_trust", "statement_type": None, "statement": statement.get("statementId"), - "subject_record_type": self._records[statement["recordDetails"]["subject"]][0], - "subject_record_subtype": self._records[statement["recordDetails"]["subject"]][1], + "subject_record_type": self._records[ + statement["recordDetails"]["subject"] + ][0], + "subject_record_subtype": self._records[ + statement["recordDetails"]["subject"] + ][1], } ) else: @@ -1689,8 +1723,12 @@ def check_ownership_or_control_statement_second_pass(self, statement): "type": "relationship_interests_subject_should_be_entity_trust", "statement_type": None, "statement": statement.get("statementId"), - "subject_record_type": self._records[statement["recordDetails"]["subject"]][0], - "subject_record_subtype": self._records[statement["recordDetails"]["subject"]][1], + "subject_record_type": self._records[ + statement["recordDetails"]["subject"] + ][0], + "subject_record_subtype": self._records[ + statement["recordDetails"]["subject"] + ][1], } ) @@ -1752,7 +1790,9 @@ def check_ownership_or_control_statement_second_pass(self, statement): "type": "relationship_interested_party_not_before_relationship_in_dataset", "statement_type": None, "statement": statement.get("statementId"), - "interested_party_id": statement["recordDetails"]["interestedParty"], + "interested_party_id": statement["recordDetails"][ + "interestedParty" + ], } ) @@ -1787,7 +1827,9 @@ def check_person_statement_first_pass(self, statement): "type": "person_identifiers_invalid_composition", "statement_type": None, "statement": statement.get("statementId"), - "scheme": identifier["scheme"] if "scheme" in identifier else None, + "scheme": identifier["scheme"] + if "scheme" in identifier + else None, } ) else: @@ -1871,6 +1913,8 @@ def check_entity_statement_first_pass(self, statement): "type": "entity_identifiers_not_known_scheme", "statement_type": None, "statement": statement.get("statementId"), - "scheme": identifier["scheme"] if "scheme" in identifier else None, + "scheme": identifier["scheme"] + if "scheme" in identifier + else None, } ) diff --git a/libcovebods/tasks/statistics.py b/libcovebods/tasks/statistics.py index 0e6501d..43003b8 100644 --- a/libcovebods/tasks/statistics.py +++ b/libcovebods/tasks/statistics.py @@ -390,11 +390,17 @@ def check_ownership_or_control_statement_second_pass(self, statement): interested_party = statement["recordDetails"].get("interestedParty") if interested_party: if interested_party in self.entity_record_ids: - self.count_ownership_or_control_statement_interested_party_with_entity += 1 + self.count_ownership_or_control_statement_interested_party_with_entity += ( + 1 + ) if interested_party in self.person_record_ids: - self.count_ownership_or_control_statement_interested_party_with_person += 1 + self.count_ownership_or_control_statement_interested_party_with_person += ( + 1 + ) if isinstance(interested_party, dict): - self.count_ownership_or_control_statement_interested_party_with_unspecified += 1 + self.count_ownership_or_control_statement_interested_party_with_unspecified += ( + 1 + ) def get_statistics(self): data = { @@ -417,6 +423,7 @@ def get_statistics(self): class StatisticsCurrentOwnershipOrControlStatementsAndReplacesStatementsMissing( AdditionalCheck ): + @staticmethod def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool: return schema_object.is_schema_version_equal_to_or_less_than("0.3") @@ -455,35 +462,35 @@ def get_statistics(self): } return data -class StatisticsStatementsRecordStatus( - AdditionalCheck -): + +class StatisticsStatementsRecordStatus(AdditionalCheck): @staticmethod def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool: return schema_object.is_schema_version_equal_to_or_greater_than("0.4") def __init__(self, lib_cove_bods_config, schema_object): super().__init__(lib_cove_bods_config, schema_object) - #self.count_replaces_statements_missing = 0 - #self.statement_ids = set() - #self.current_statement_ids = set() self.records = {} self.missing_new_records = {} self.current_records_count = 0 self.missing_new_records_count = 0 def check_statement_first_pass(self, statement): - if (isinstance(statement.get("recordStatus"), str) and - isinstance(statement.get("recordId"), str) and - statement.get("recordStatus") in ('new', 'updated', 'closed')): + if ( + isinstance(statement.get("recordStatus"), str) + and isinstance(statement.get("recordId"), str) + and statement.get("recordStatus") in ("new", "updated", "closed") + ): if not statement.get("recordId") in self.records: - if not statement.get("recordStatus") == 'new': - self.missing_new_records[statement.get("recordId")] = statement.get("statementId") + if not statement.get("recordStatus") == "new": + self.missing_new_records[statement.get("recordId")] = statement.get( + "statementId" + ) self.records[statement.get("recordId")] = statement.get("recordStatus") def final_checks(self): for record_id in self.records: - if not self.records[record_id] == 'closed': + if not self.records[record_id] == "closed": self.current_records_count += 1 for record_id in self.missing_new_records: self.missing_new_records_count += 1 diff --git a/tests/api.py b/tests/api.py index f170a66..105bb28 100644 --- a/tests/api.py +++ b/tests/api.py @@ -15,7 +15,9 @@ def bods_json_output( sample_mode=False, ): # Data Reader - data_reader = libcovebods.data_reader.DataReader(input_file_name, sample_mode=sample_mode) + data_reader = libcovebods.data_reader.DataReader( + input_file_name, sample_mode=sample_mode + ) # classes if not lib_cove_bods_config: diff --git a/tests/test_additional_fields_0_4_0.py b/tests/test_additional_fields_0_4_0.py index 116924e..dc2ce11 100644 --- a/tests/test_additional_fields_0_4_0.py +++ b/tests/test_additional_fields_0_4_0.py @@ -1,10 +1,6 @@ import os import tempfile -import libcovebods.data_reader -from libcovebods.additionalfields import AdditionalFields -from libcovebods.config import LibCoveBODSConfig -from libcovebods.schema import SchemaBODS from tests.api import bods_json_output @@ -14,7 +10,10 @@ def test_additional_fields_1(): prefix="lib-cove-bods-tests-", dir=tempfile.gettempdir() ) json_filename = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "fixtures", "0.4", "additional_fields_1.json" + os.path.dirname(os.path.realpath(__file__)), + "fixtures", + "0.4", + "additional_fields_1.json", ) results = bods_json_output(cove_temp_folder, json_filename) @@ -31,7 +30,10 @@ def test_additional_fields_2(): prefix="lib-cove-bods-tests-", dir=tempfile.gettempdir() ) json_filename = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "fixtures", "0.4", "additional_fields_2.json" + os.path.dirname(os.path.realpath(__file__)), + "fixtures", + "0.4", + "additional_fields_2.json", ) results = bods_json_output(cove_temp_folder, json_filename) @@ -39,6 +41,6 @@ def test_additional_fields_2(): assert results["schema_version"] == "0.4" assert results["validation_errors_count"] == 0 assert results["additional_fields_count"] == 0 - assert results["additional_checks_count"] == 2 # Some checks fail because isolated statement - - + assert ( + results["additional_checks_count"] == 2 + ) # Some checks fail because isolated statement diff --git a/tests/test_stat_counts_0_4.py b/tests/test_stat_counts_0_4.py index b221b8b..47017f9 100644 --- a/tests/test_stat_counts_0_4.py +++ b/tests/test_stat_counts_0_4.py @@ -20,18 +20,66 @@ def test_sample_mode_valid_1(): assert results["schema_version"] == "0.4" - assert results['statistics']['count_entity_statements'] == 50 - assert results['statistics']['count_entity_statements_types']['registeredEntity'] == 50 - assert results['statistics']['count_entity_statements_types_with_any_identifier']['registeredEntity'] == 50 - assert results['statistics']['count_entity_statements_types_with_any_identifier_with_id_and_scheme']['registeredEntity'] == 50 - assert results['statistics']['count_person_statements'] == 50 - assert results['statistics']['count_person_statements_types']['knownPerson'] == 50 - assert results['statistics']['count_ownership_or_control_statement'] == 50 - assert results['statistics']["count_ownership_or_control_statement_interested_party_with_person"] == 50 - assert results['statistics']["count_ownership_or_control_statement_interested_party_with_entity"] == 0 - assert results['statistics']["count_ownership_or_control_statement_interested_party_with_unspecified"] == 0 - assert results['statistics']['count_ownership_or_control_statement_interested_party'] == 50 - assert results['statistics']['count_ownership_or_control_statement_interest_statement_types']['shareholding'] == 50 - assert results['statistics']['count_ownership_or_control_statement_by_year'][2020] == 50 - assert results['statistics']['count_ownership_or_control_statement_subject_by_year'][2020] == 50 - assert results['statistics']['count_ownership_or_control_statement_interested_party_by_year'][2020] == 50 + assert results["statistics"]["count_entity_statements"] == 50 + assert ( + results["statistics"]["count_entity_statements_types"]["registeredEntity"] == 50 + ) + assert ( + results["statistics"]["count_entity_statements_types_with_any_identifier"][ + "registeredEntity" + ] + == 50 + ) + assert ( + results["statistics"][ + "count_entity_statements_types_with_any_identifier_with_id_and_scheme" + ]["registeredEntity"] + == 50 + ) + assert results["statistics"]["count_person_statements"] == 50 + assert results["statistics"]["count_person_statements_types"]["knownPerson"] == 50 + assert results["statistics"]["count_ownership_or_control_statement"] == 50 + assert ( + results["statistics"][ + "count_ownership_or_control_statement_interested_party_with_person" + ] + == 50 + ) + assert ( + results["statistics"][ + "count_ownership_or_control_statement_interested_party_with_entity" + ] + == 0 + ) + assert ( + results["statistics"][ + "count_ownership_or_control_statement_interested_party_with_unspecified" + ] + == 0 + ) + assert ( + results["statistics"]["count_ownership_or_control_statement_interested_party"] + == 50 + ) + assert ( + results["statistics"][ + "count_ownership_or_control_statement_interest_statement_types" + ]["shareholding"] + == 50 + ) + assert ( + results["statistics"]["count_ownership_or_control_statement_by_year"][2020] + == 50 + ) + assert ( + results["statistics"]["count_ownership_or_control_statement_subject_by_year"][ + 2020 + ] + == 50 + ) + assert ( + results["statistics"][ + "count_ownership_or_control_statement_interested_party_by_year" + ][2020] + == 50 + )