From a08253884cb6d692b235aeb0b1442f602f41df62 Mon Sep 17 00:00:00 2001 From: "Ed (ODSC)" Date: Fri, 13 Sep 2024 13:06:46 +0100 Subject: [PATCH] libcovebods: For 0.4 use Draft202012Validator and directory based schema --- libcovebods/config.py | 2 +- .../data/schema-0-4-0/codelist-schema.json | 50 +++ .../schema-0-4-0/codelists/addressType.csv | 7 + .../codelists/annotationMotivation.csv | 6 + .../codelists/directOrIndirect.csv | 4 + .../schema-0-4-0/codelists/entitySubtype.csv | 6 + .../schema-0-4-0/codelists/entityType.csv | 8 + .../schema-0-4-0/codelists/interestType.csv | 24 ++ .../data/schema-0-4-0/codelists/nameType.csv | 7 + .../schema-0-4-0/codelists/personType.csv | 4 + .../schema-0-4-0/codelists/recordStatus.csv | 4 + .../schema-0-4-0/codelists/recordType.csv | 4 + .../codelists/securitiesIdentifierSchemes.csv | 5 + .../schema-0-4-0/codelists/sourceType.csv | 6 + .../codelists/unspecifiedReason.csv | 8 + libcovebods/data/schema-0-4-0/components.json | 237 ++++++++++++ .../data/schema-0-4-0/entity-record.json | 356 ++++++++++++++++++ .../data/schema-0-4-0/person-record.json | 245 ++++++++++++ .../schema-0-4-0/relationship-record.json | 191 ++++++++++ libcovebods/data/schema-0-4-0/statement.json | 337 +++++++++++++++++ libcovebods/jsonschemavalidate.py | 35 +- libcovebods/schema.py | 106 +++--- libcovebods/schema_dir.py | 34 ++ setup.py | 1 + 24 files changed, 1626 insertions(+), 61 deletions(-) create mode 100644 libcovebods/data/schema-0-4-0/codelist-schema.json create mode 100644 libcovebods/data/schema-0-4-0/codelists/addressType.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/annotationMotivation.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/directOrIndirect.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/entitySubtype.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/entityType.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/interestType.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/nameType.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/personType.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/recordStatus.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/recordType.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/securitiesIdentifierSchemes.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/sourceType.csv create mode 100644 libcovebods/data/schema-0-4-0/codelists/unspecifiedReason.csv create mode 100644 libcovebods/data/schema-0-4-0/components.json create mode 100644 libcovebods/data/schema-0-4-0/entity-record.json create mode 100644 libcovebods/data/schema-0-4-0/person-record.json create mode 100644 libcovebods/data/schema-0-4-0/relationship-record.json create mode 100644 libcovebods/data/schema-0-4-0/statement.json create mode 100644 libcovebods/schema_dir.py diff --git a/libcovebods/config.py b/libcovebods/config.py index 3f11090..482274c 100644 --- a/libcovebods/config.py +++ b/libcovebods/config.py @@ -19,7 +19,7 @@ "schema_url_host": _schema_folder, }, "0.4": { - "schema_url": os.path.join(_schema_folder, "schema-0-4-0.json"), + "schema_url": os.path.join(_schema_folder, "schema-0-4-0"), "schema_url_host": _schema_folder, }, }, diff --git a/libcovebods/data/schema-0-4-0/codelist-schema.json b/libcovebods/data/schema-0-4-0/codelist-schema.json new file mode 100644 index 0000000..5156332 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelist-schema.json @@ -0,0 +1,50 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:codelists", + "version": "0.4", + "title": "Codelist schema", + "description": "Schema for BODS codelist CSV files.", + "type": "array", + "items": { + "type": "object", + "required": [ + "code", + "title", + "description" + ], + "additionalProperties": false, + "properties": { + "code": { + "title": "code", + "description": "The value to use in BODS data. Codes should match the letter case of external codes (e.g. ISO 4217) and should be camel case, otherwise.", + "type": "string", + "minLength": 1, + "pattern": "^[^\\s].*[^\\s]$" + }, + "title": { + "title": "title", + "description": "A short title for the code.", + "type": "string", + "minLength": 1, + "pattern": "^[^\\s].*[^\\s]$" + }, + "description": { + "title": "description", + "description": "A longer-form description of the code.", + "type": "string", + "minLength": 1, + "pattern": "^[^\\s].*[^\\s]$" + }, + "technical note": { + "title": "technical note", + "description": "Notes on the application of the code.", + "type": [ + "string", + "null" + ] + } + } + }, + "minItems": 1, + "uniqueItems": true +} diff --git a/libcovebods/data/schema-0-4-0/codelists/addressType.csv b/libcovebods/data/schema-0-4-0/codelists/addressType.csv new file mode 100644 index 0000000..97d33da --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/addressType.csv @@ -0,0 +1,7 @@ +code,title,description +placeOfBirth,Place of birth,The place where a person was born. A town (in the `address` field) and `country` may be sufficient information. This MUST only apply to the `placeOfBirth` field for natural persons. +residence,Residential address,An address where someone lives. This MUST only apply to natural persons. +registered,Registered address,An official address for delivering statutory mail and legal notices which must be provided to company registers. This MUST only apply to entities. +service,Service address,An address which can be used as an alternative to a residential address for the purpose of receiving post. This MUST only apply to natural persons. +alternative,Alternative address,An address provided in addition to the primary address for this entity or person that is neither a service nor a registered address. +business,Business address,A place where the entity conducts its business. diff --git a/libcovebods/data/schema-0-4-0/codelists/annotationMotivation.csv b/libcovebods/data/schema-0-4-0/codelists/annotationMotivation.csv new file mode 100644 index 0000000..18e8437 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/annotationMotivation.csv @@ -0,0 +1,6 @@ +code,title,description +commenting,Commenting,"The description field provides contextual comments for a field, object or statement." +correcting,Correcting,"The value of this field, object or statement has been corrected, using the method in the description field or from the original value given in the description field." +identifying,Identifying,"The value of this field, object or statement has been augmented or processed for the purpose of identifying natural persons or legal entities, using the method in the description field." +linking,Linking,"The description explains how linked material relates to the field, object or statement. A URL to linked material MUST be provided in the `url` field." +transformation,Transformation,"The values of this field, object or statement have been changed from their original form using the method in the description field. The transformed representation may be provided in the `transformedContent` field." diff --git a/libcovebods/data/schema-0-4-0/codelists/directOrIndirect.csv b/libcovebods/data/schema-0-4-0/codelists/directOrIndirect.csv new file mode 100644 index 0000000..a73103a --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/directOrIndirect.csv @@ -0,0 +1,4 @@ +code,title,description +direct,Direct,The interest is held directly. +indirect,Indirect,The interest is held through one or more intermediate entities (including arrangements). +unknown,Unknown,The interest may be direct or indirect. diff --git a/libcovebods/data/schema-0-4-0/codelists/entitySubtype.csv b/libcovebods/data/schema-0-4-0/codelists/entitySubtype.csv new file mode 100644 index 0000000..3e3d10a --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/entitySubtype.csv @@ -0,0 +1,6 @@ +code,title,description +governmentDepartment,Government department,"An element of government with executive responsibilities. These terms are often used to designate such a body: ministry, department, bureau or office. This MUST be used with `entityType.type` 'stateBody'." +stateAgency,State Agency,A body overseeing or administering elements of public policy. State agencies may have responsibilities which are devolved from government departments or from the state's legislative body. This MUST be used with `entityType.type` 'stateBody. +other,Other,Any other type of entity. +trust,Trust,A trust or trust like arrangement. An arrangement where a settlor transfers ownership of assets to trustees to control for the benefit of beneficiaries. This MUST be used with `entityType.type` 'arrangement' or 'legalEntity'. +nomination,Nomination,An agreement where a nominator instructs a nominee to act on their behalf in a specified capacity. This MUST be used with `entityType.type` 'arrangement'. diff --git a/libcovebods/data/schema-0-4-0/codelists/entityType.csv b/libcovebods/data/schema-0-4-0/codelists/entityType.csv new file mode 100644 index 0000000..3c9ac50 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/entityType.csv @@ -0,0 +1,8 @@ +code,title,description +registeredEntity,Registered Entity,"A legal entity created through an act of official registration. In most cases, registered entities have an officially issued identifier." +legalEntity,Legal entity,"A body with distinct legal personality, such as an international institution or statutory corporation, but which is not otherwise uniquely identified in some official register." +arrangement,Arrangement,"A legal arrangement, agreement, contract or other mechanism via which one or more natural or legal persons can associate to exert ownership or control over an entity. Parties to an arrangement have no other form of collective legal identity." +anonymousEntity,Anonymous entity,"An entity that has been identified, but for which identifying information is being withheld. The reason for non-disclosure should be given in the accompanying `unspecifiedEntityDetails` field." +unknownEntity,Unknown entity,An entity that has not been identified. +state,State,"A country, nation or community with legal sovereignty within a territory." +stateBody,State body,A core administrative or legislative unit within a state's apparatus. diff --git a/libcovebods/data/schema-0-4-0/codelists/interestType.csv b/libcovebods/data/schema-0-4-0/codelists/interestType.csv new file mode 100644 index 0000000..0f05503 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/interestType.csv @@ -0,0 +1,24 @@ +code,title,description +shareholding,Shareholding,An economic interest in an entity gained by holding shares. +votingRights,Voting rights,"A controlling interest in an entity gained by holding shares. Defined as the right of shareholders to vote on matters of corporate policy, including decisions on the makeup of the board of directors, issuing securities, initiating corporate actions and making substantial changes in the corporation's operations." +appointmentOfBoard,Appointment of board,A controlling interest in an entity. Defined as the absolute right to appoint members of the board of directors. +otherInfluenceOrControl,Other influence or control,"Any influence or control in an entity that is distinct from being a shareholder, having voting rights or having the absolute right to appoint to the board." +seniorManagingOfficial,Senior managing official,A controlling interest in an entity gained by employment. Defined as the person who exercises control over the management of the entity. +settlor,Settlor,"A person who, either actually or by operation of law, creates a trust. They may be referred to by other names such as a trustor, a grantor or a donor. This should also be used for a person with a settlor-equivalent role in any legal arrangement that is similar to a trust." +trustee,Trustee,A person who administers a trust for the benefit of a third party and in whom the legal title of the trust property is vested either by declaration of the settlor or by operation of law. This should also be used for a person with a trustee-equivalent role in any legal arrangement that is similar to a trust. +protector,Protector,"A person appointed to protect the interests or wishes of the settlor, providing influence and guidance to the trustee who administers the trust. This should also be used for a person with a protector-equivalent role in any legal arrangement that is similar to a trust." +beneficiaryOfLegalArrangement,Beneficiary of a legal arrangement,A person who benefits from a trust or other legal arrangement's holdings or activities. +rightsToSurplusAssetsOnDissolution,Rights to surplus assets on dissolution,The right to a share in the amount of an asset or resource that exceeds the portion that is utilized upon the winding up of an entity. +rightsToProfitOrIncome,Rights to receive profits or income,An economic interest in an entity. Defined as beneficial ownership rights beyond those otherwise implied by ownership structures that are granted by contract. +rightsGrantedByContract,Rights granted by contract,An interest that is granted by contract. +conditionalRightsGrantedByContract,Conditional rights granted by contract,An interest that exists only if some contractual condition is met. +controlViaCompanyRulesOrArticles,Control via company rules or articles,Control of an entity gained through a provision in company articles or by shareholder agreement. +controlByLegalFramework,Control by legal framework,"Control of an entity gained through a legal framework, such as a combination of legislation (primary and secondary). This type of interest is created by governments and legislators in order to establish and guide agencies and entities linked to the state." +boardMember,Board member,One of a group of people constituted as the strategic decision-making body of an organization. +boardChair,Board chair,The person holding the most power and authority on the board of directors. +unknownInterest,Unknown interest,"The interestedParty is known to have an interest in the subject of this Relationship Statement, but the nature of the interest is unknown." +unpublishedInterest,Unpublished interest,The nature of this interest is known but is not published. +enjoymentAndUseOfAssets,Enjoyment and use of assets,The use of assets belonging to an entity. +rightToProfitOrIncomeFromAssets,Right to profit or income from assets,"The right to derive profits, income or both from assets belonging to an entity." +nominee,Nominee,A person who agrees to act on behalf of the nominator in a specified capacity. +nominator,Nominator,A person who instructs a nominee to act on their behalf in a specified capacity. diff --git a/libcovebods/data/schema-0-4-0/codelists/nameType.csv b/libcovebods/data/schema-0-4-0/codelists/nameType.csv new file mode 100644 index 0000000..1ce63f9 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/nameType.csv @@ -0,0 +1,7 @@ +code,title,description +legal,Legal name,"A name that identifies the person for legal, administrative and other official purposes. A person's legal name will usually be the one that is found on official government documents." +translation,Translation of name,A translation of the person's legal name in a different language. +transliteration,Transliteration of name,A transliteration of the person's legal name in a different script. +former,Former name,A name that the person has used in the past. +alternative,Alternative name,"Another name that the person is known by. This might be an alias, a nickname or a name that the person is also known as (aka)." +birth,Birth name,The legal name of the person at birth. diff --git a/libcovebods/data/schema-0-4-0/codelists/personType.csv b/libcovebods/data/schema-0-4-0/codelists/personType.csv new file mode 100644 index 0000000..fd62df0 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/personType.csv @@ -0,0 +1,4 @@ +code,title,description +knownPerson,Known person,"A natural person who has been identified, and information such as names, identifiers or biographical information can be provided about them." +anonymousPerson,Anonymous person,"A natural person who has been identified, but identifying information is being withheld. The reason for non-disclosure should be given in the accompanying `unspecifiedPersonDetails` field." +unknownPerson,Unknown person,A natural person whose identity has not been discovered or confirmed. diff --git a/libcovebods/data/schema-0-4-0/codelists/recordStatus.csv b/libcovebods/data/schema-0-4-0/codelists/recordStatus.csv new file mode 100644 index 0000000..c302cbb --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/recordStatus.csv @@ -0,0 +1,4 @@ +code,title,description +new,New Record,This is the first Statement published pertaining to the record referenced by `recordId`. +updated,Updated Record,This Statement updates information published in a prior Statement pertaining to the record referenced by `recordId`. +closed,Closed Record,This is the final Statement published pertaining to the record referenced by `recordId`. diff --git a/libcovebods/data/schema-0-4-0/codelists/recordType.csv b/libcovebods/data/schema-0-4-0/codelists/recordType.csv new file mode 100644 index 0000000..78be5c1 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/recordType.csv @@ -0,0 +1,4 @@ +code,title,description,technical note +person,Person,"The Record Details in this Statement contain information about a natural person at a particular point in time.",The recordDetails object should be validated using the Person Record schema. +entity,Entity,"The Record Details in this Statement contain information that identifies and describes an entity at a particular point in time.",The recordDetails object should be validated using the Entity Record schema. +relationship,Relationship,"The Record Details in this Statement describe the interests held by an interested party in a subject (an entity) at a particular point in time.",The recordDetails object should be validated using the Relationship Record schema. diff --git a/libcovebods/data/schema-0-4-0/codelists/securitiesIdentifierSchemes.csv b/libcovebods/data/schema-0-4-0/codelists/securitiesIdentifierSchemes.csv new file mode 100644 index 0000000..fd34b56 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/securitiesIdentifierSchemes.csv @@ -0,0 +1,5 @@ +code,title,description +isin,International Securities Identification Number (ISIN),Scheme for unique identification of securities internationally. Securities are issued 12-character identifiers. Full details of the scheme are specified by ISO 6166. +figi,Financial Instrument Global identifier (FIGI),Scheme for unique identification of securities internationally. Securities are issued 12-character identifiers. Full details of the scheme are specified as an Object Management Group standard: https://www.omg.org/spec/FIGI/. +cusip,Committee on Uniform Securities Identification Procedures (CUSIP) Number,Scheme for unique identification of securites in North America. Securities are issued 9-character identifiers. The scheme is managed by the American Bankers Association and specified as ANSI X9.6. +cins,CUSIP International Numbering System (CINS),Scheme for unique identification of securities issued outside North America. Securities are issued 9-character identifiers. The scheme is managed by the American Bankers Association and specified as ANSI X9.6. diff --git a/libcovebods/data/schema-0-4-0/codelists/sourceType.csv b/libcovebods/data/schema-0-4-0/codelists/sourceType.csv new file mode 100644 index 0000000..8595563 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/sourceType.csv @@ -0,0 +1,6 @@ +code,title,description +selfDeclaration,Self declaration,"The information was provided by the person or entity referred to from `declarationSubject`, or by their authorised representative." +officialRegister,Official register,The information was taken from an official register. +thirdParty,Third party,"The information was provided by a third party, not directly related to the person, entity or interests described by this statement." +primaryResearch,Primary research,The information was provided as a result of research into primary sources. +verified,Verified,The information has been verified through the process documented in the associated description. diff --git a/libcovebods/data/schema-0-4-0/codelists/unspecifiedReason.csv b/libcovebods/data/schema-0-4-0/codelists/unspecifiedReason.csv new file mode 100644 index 0000000..b66bb41 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/codelists/unspecifiedReason.csv @@ -0,0 +1,8 @@ +code,title,description +noBeneficialOwners,No beneficial owners,There are no beneficial owners who need to disclose ownership according to the rules under which this statement is made. +subjectUnableToConfirmOrIdentifyBeneficialOwner,Subject unable to confirm or identify beneficial owner,"The subject of this Relationship Statement has, as the disclosing party, been unwilling or unable to confirm the existence identify a beneficial owner." +interestedPartyHasNotProvidedInformation,Interested party has not provided information,The interested party in this Relationship Statement has not provided enough information to identify or confirm the identity of the beneficial owner. +subjectExemptFromDisclosure,Subject exempt from disclosure,The subject of this Relationship Statement is not required to disclose its beneficial owner. +interestedPartyExemptFromDisclosure,Interested party exempt from disclosure,The interested party in this Relationship Statement is exempt from having their identity disclosed. +unknown,Unknown reason,The reason an interested party cannot be provided is not known. +informationUnknownToPublisher,Information unknown to the publisher of the data,A publisher does not have access to information on this person or entity. This should not generally be used in situations where one party has the responsibility to provide such information. diff --git a/libcovebods/data/schema-0-4-0/components.json b/libcovebods/data/schema-0-4-0/components.json new file mode 100644 index 0000000..a28bdcc --- /dev/null +++ b/libcovebods/data/schema-0-4-0/components.json @@ -0,0 +1,237 @@ +{ + "$id": "urn:components", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$comment": "This file contains generic components which are used in multiple places throughout the BODS schema. Components used only in Statement or one type of Record should be defined in their respective files.", + "title": "Components", + "description": "Types of object that can be used via $ref throughout the BODS schema.", + "version": "0.4", + "$defs": { + "Address": { + "title": "Address", + "description": "Semi-structured address details, suitable for processing using address-parsing algorithms. Where postal codes and country information are isolated fields in source systems, this information SHOULD be published in the dedicated fields and SHOULD NOT be published in the `address` field.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "description": "The function of the address, using the addressType codelist.", + "type": "string", + "enum": [ + "placeOfBirth", + "residence", + "registered", + "service", + "alternative", + "business" + ], + "codelist": "addressType.csv", + "openCodelist": false + }, + "address": { + "title": "Address", + "description": "The address, with each line or component separated by a line-break or comma.", + "type": "string" + }, + "postCode": { + "title": "Postcode", + "description": "The postal code for this address.", + "type": "string" + }, + "country": { + "title": "Country", + "description": "The country for this address.", + "$ref": "urn:components#/$defs/Country" + } + } + }, + "Country": { + "title": "Country", + "description": "A country MUST have a name. A country SHOULD have a 2-letter country code (ISO 3166-1)", + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The name of the country", + "type": "string" + }, + "code": { + "title": "Country Code", + "description": "The 2-letter country code (ISO 3166-1) for this country.", + "type": "string", + "maxLength": 2, + "minLength": 2 + } + }, + "required": [ + "name" + ] + }, + "Jurisdiction": { + "title": "Jurisdiction", + "description": "A Jurisdiction MUST have a name. A jurisdiction SHOULD have a 2-letter country code (ISO 3166-1) or a subdivision code (ISO 3166-2).", + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The name of the jurisdiction", + "type": "string" + }, + "code": { + "title": "Country or Subdivision Code", + "description": "The 2-letter country code (ISO 3166-1) or the subdivision code (ISO 3166-2) for the jurisdiction.", + "type": "string", + "maxLength": 6, + "minLength": 2 + } + }, + "required": [ + "name" + ] + }, + "Identifier": { + "title": "Identifier", + "description": "An identifier that has been assigned to a person or entity. `scheme` or `schemeName` (or both) MUST be included in an Identifier object.", + "type": "object", + "properties": { + "id": { + "title": "ID", + "description": "The identifier for a person or entity, as issued by the scheme.", + "type": "string" + }, + "scheme": { + "title": "Scheme Code", + "description": "For entities, a code from org-id.guide (https://www.org-id.guide) for an identifier-issuing authority (e.g. 'GB-COH'). For natural persons, a value with the pattern {JURISDICTION}-{TYPE} where JURISDICTION is an ISO 3166-1 3-digit country code and TYPE is one of PASSPORT, TAXID or IDCARD.", + "type": "string" + }, + "schemeName": { + "title": "Scheme Name", + "description": "The name of the identifier-issuing authority.", + "type": "string" + }, + "uri": { + "title": "URI", + "description": "A canonical URI (https://en.wikipedia.org/wiki/Uniform_Resource_Identifier) for the identifier and associated details of the person or entity, if one exists.", + "type": "string", + "format": "uri" + } + }, + "anyOf": [ + { + "required": [ + "scheme" + ] + }, + { + "required": [ + "schemeName" + ] + }, + { + "required": [ + "scheme", + "schemeName" + ] + } + ] + }, + "UnspecifiedRecord": { + "title": "Unspecified or unknown person or entity", + "description": "A `reason` MUST be supplied.", + "type": "object", + "properties": { + "reason": { + "title": "Reason", + "description": "The reason that a person or entity cannot be specified, using the unspecifiedReason codelist.", + "type": "string", + "enum": [ + "noBeneficialOwners", + "subjectUnableToConfirmOrIdentifyBeneficialOwner", + "interestedPartyHasNotProvidedInformation", + "subjectExemptFromDisclosure", + "interestedPartyExemptFromDisclosure", + "unknown", + "informationUnknownToPublisher" + ], + "codelist": "unspecifiedReason.csv", + "openCodelist": false + }, + "description": { + "title": "Description", + "description": "Additional information about the absence of details for a person or entity. This field may be used to provide set phrases from a source system, or a free text explanation.", + "type": "string" + } + }, + "required": [ + "reason" + ] + }, + "Source": { + "title": "Source", + "description": "Details describing an information source.", + "type": "object", + "properties": { + "type": { + "title": "Source Type", + "description": "The types of the source, using the sourceType codelist. Include 'verified' in the array if the information in the Statement has undergone a verification process.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "selfDeclaration", + "officialRegister", + "thirdParty", + "primaryResearch", + "verified" + ], + "codelist": "sourceType.csv", + "openCodelist": false + } + }, + "description": { + "title": "Description", + "description": "Additional, free text information about the source of information.", + "type": "string" + }, + "url": { + "title": "Source URL", + "description": "The external URL from which this information was fetched, if relevant. Or, if relevant, a URL providing additional detail on how this information was sourced.", + "type": "string", + "format": "uri" + }, + "retrievedAt": { + "title": "Retrieved At", + "description": "A timestamp indicating when this information was imported from an external system, in full-date (YYYY-MM-DD) or date-time (e.g. YYYY-MM-DDTHH:MM:SSZ) format. See the IETF RFC3339 standard, section 5.6.", + "type": "string", + "anyOf": [ + { + "format": "date" + }, + { + "format": "date-time" + } + ] + }, + "assertedBy": { + "title": "Asserted By", + "description": "The people or organisations providing the information asserted in this Statement. This may include the declaring subject of a self-declaration, or the name of an agent making a declaration on their behalf. If this Statement has been verified, the array may include the name of the organisation providing verification.", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The name of the agent making the assertion", + "type": "string" + }, + "uri": { + "title": "URI", + "description": "An optional URI to identify the agent making the assertion", + "type": "string", + "format": "uri" + } + } + } + } + } + } + } +} diff --git a/libcovebods/data/schema-0-4-0/entity-record.json b/libcovebods/data/schema-0-4-0/entity-record.json new file mode 100644 index 0000000..12d3ba9 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/entity-record.json @@ -0,0 +1,356 @@ +{ + "$id": "urn:entity", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "version": "0.4", + "title": "Entity Record Details", + "description": "Information about an entity.", + "type": "object", + "properties": { + "isComponent": { + "title": "Is component", + "description": "Whether this entity is a component in an indirect relationship. Where `isComponent` is 'true': (1) the `recordId` of this entity MUST be an element in the `componentRecords` array of that primary Relationship Statement, (2) this Entity Statement MUST come before that primary Relationship Statement in a BODS package or stream, (3) the replacement of this Entity Statement SHOULD be considered when replacing the primary Relationship Statement. The primary Relationship statement MUST have a `isComponent` value of 'false'.", + "type": "boolean" + }, + "entityType": { + "type": "object", + "title": "Entity Type", + "description": "The form of the entity described in the Statement.", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "title": "Type", + "description": "The general form of the entity, using the entityType codelist.", + "codelist": "entityType.csv", + "enum": [ + "registeredEntity", + "legalEntity", + "arrangement", + "anonymousEntity", + "unknownEntity", + "state", + "stateBody" + ] + }, + "subtype": { + "type": "string", + "title": "Subtype", + "description": "The particular form of the entity, where relevant, using the entitySubtype codelist. The value MUST align with the `entityType` value.", + "codelist": "entitySubtype.csv", + "enum": [ + "governmentDepartment", + "stateAgency", + "other", + "trust", + "nomination" + ], + "openCodelist": false + }, + "details": { + "type": "string", + "title": "Details", + "description": "This may be used to provide a local name for this type of entity, or any further information to identify the type of entity. For example, in Finland 'ministeriö' for a government department." + } + }, + "propertyOrder": 4, + "allOf":[ + { + "if": { + "properties": { + "type": { + "enum": [ + "arrangement" + ] + } + } + }, + "then": { + "properties": { + "subtype": { + "enum":[ + "trust", + "nomination", + "other" + ] + } + } + } + }, + { + "if": { + "properties": { + "type": { + "enum": [ + "legalEntity" + ] + } + } + }, + "then": { + "properties": { + "subtype": { + "enum": [ + "trust", + "other" + ] + } + } + } + }, + { + "if": { + "properties": { + "type": { + "enum": [ + "stateBody" + ] + } + } + }, + "then": { + "properties": { + "subtype": { + "enum": [ + "governmentDepartment", + "stateAgency", + "other" + ] + } + } + } + }, + { + "if": { + "properties":{ + "type":{ + "enum":[ + "registeredEntity", + "state", + "anonymousEntity", + "unknownEntity" + ] + } + } + }, + "then": { + "properties": { + "subtype":{ + "enum": [ + "other" + ] + } + } + } + } + ] + }, + "unspecifiedEntityDetails": { + "$ref": "urn:components#/$defs/UnspecifiedRecord", + "description": "An explanation of why an entity is anonymous or unknown." + }, + "name": { + "title": "Entity Name", + "description": "The declared name of this entity.", + "type": "string", + "propertyOrder": 10 + }, + "alternateNames": { + "title": "Alternative Names", + "description": "An array of other names this entity is known by.", + "type": "array", + "items": { + "type": "string", + "title": "Name", + "description": "A name this entity is known by." + }, + "propertyOrder": 12 + }, + "jurisdiction": { + "title": "Jurisdiction", + "description": "The jurisdiction in which this entity was registered or created (for legal and registered entities, and arrangements). Or the state's jurisdiction (for states and state bodies).", + "propertyOrder": 15, + "$ref": "urn:components#/$defs/Jurisdiction" + }, + "identifiers": { + "title": "Identifiers", + "description": "One or more official identifiers for this entity. Where available, official registration numbers should be provided.", + "type": "array", + "items": { + "$ref": "urn:components#/$defs/Identifier" + }, + "propertyOrder": 20 + }, + "foundingDate": { + "title": "Founding Date", + "description": "The date on which this entity was founded, created or registered. The date MUST be given in YYYY-MM-DD format. Where a precise month or date are not available, the value may be rounded to the first day of the (first) month. This rounding SHOULD be noted in accompanying guidance (such as a publication policy or data use guide).", + "type": "string", + "format": "date", + "propertyOrder": 30 + }, + "dissolutionDate": { + "title": "Dissolution Date", + "description": "The date on which this entity was dissolved or ceased, if it is no longer active. The date MUST be given in YYYY-MM-DD format. Where a precise month or date are not available, the value may be rounded to the first day of the (first) month. This rounding SHOULD be noted in accompanying guidance (such as a publication policy or data use guide).", + "type": "string", + "format": "date", + "propertyOrder": 35 + }, + "addresses": { + "title": "Addresses", + "description": "One or more addresses for this entity.", + "type": "array", + "items": { + "$ref": "urn:components#/$defs/Address", + "properties":{ + "type":{ + "enum":[ + "registered", + "business", + "alternative" + ] + } + } + }, + "propertyOrder": 40 + }, + "uri": { + "title": "URI", + "description": "Where a persistent URI (https://en.wikipedia.org/wiki/Uniform_Resource_Identifier) is available for this entity this should be included.", + "type": "string", + "format": "uri", + "propertyOrder": 21 + }, + "publicListing": { + "title": "Public Listing", + "description": "Details of a publicly listed company, its securities (shares and other tradable financial instruments related to the entity), and related regulatory filings.", + "$ref": "#/$defs/PublicListing" + }, + "formedByStatute": { + "type": "object", + "title": "Formed by Statute", + "description": "The law which mandated the formation of the entity described in the statement, where applicable. This information SHOULD be provided where a state has created an agency or other entity with specific legislation.", + "properties": { + "name": { + "type": "string", + "title": "Statute Name", + "description": "The name of the law. " + }, + "date": { + "type": "string", + "title": "Date", + "description": "The date on which the law came into force. The date MUST be given in YYYY-MM-DD format. Where a precise month or date are not available, the value may be rounded to the first day of the (first) month. This rounding SHOULD be noted in accompanying guidance (such as a publication policy or data use guide).", + "format": "date" + } + }, + "propertyOrder": 18 + } + }, + "required": [ + "isComponent", + "entityType" + ], + "$defs": { + "PublicListing": { + "type": "object", + "title": "Public Listing", + "description": "Details of a publicly listed company, its securities (shares and other tradable financial instruments related to the entity), and related regulatory filings.", + "required": [ + "hasPublicListing" + ], + "minProperties": 1, + "properties": { + "hasPublicListing": { + "type": "boolean", + "title": "Has Public Listing", + "description": "Whether the entity is a publicly listed company." + }, + "companyFilingsURLs": { + "type": "array", + "title": "Company Filings URLs", + "description": "URL or URLs where regulatory filings related to major holdings can be retrieved. URLs may point to pages maintained by regulatory bodies, stock exchanges or by the company itself.", + "items": { + "type": "string", + "format": "uri" + } + }, + "securitiesListings": { + "type": "array", + "title": "Securities Listings", + "description": "Details of the entity's securities and the public exchanges and markets on which they are traded. All equity securities SHOULD be listed here, plus any other securities from which beneficial ownership might be derived. Where a security is traded on more than one market, there SHOULD be an entry for each market (or market segment).", + "items": { + "title": "Securities listing", + "description": "Details of a security and the market on which it is traded.", + "$ref": "#/$defs/SecuritiesListing" + } + } + } + }, + "SecuritiesListing": { + "type": "object", + "title": "Securities Listing", + "description": "Details of a security and the market on which it is traded.", + "required": [ + "stockExchangeJurisdiction", + "security", + "stockExchangeName" + ], + "properties": { + "marketIdentifierCode": { + "type": "string", + "title": "Market Identifier Code (MIC)", + "description": "The Market Identifier Code (MIC) of the market on which the security is traded. Where the security is traded on a segment of an exchange, this is the MIC of the segment. Where it is traded on the main exchange, this is the MIC of the main exchange and MUST match the `operatingMarketIdentifierCode`. MICs are allocated and managed under ISO standard 10383." + }, + "operatingMarketIdentifierCode": { + "type": "string", + "title": "Operating Market Identifier Code (Operating MIC)", + "description": "The Market Identifier Code (MIC) of the main exchange or trading platform handling trades in this security. Where the security is traded on a segment of an exchange, this is the MIC of the parent exchange or trading platform. Where it is traded on the main exchange, this is the MIC of that main exchange and MUST match the `marketIdentifierCode`. MICs are allocated and managed under ISO standard 10383." + }, + "stockExchangeJurisdiction": { + "type": "string", + "title": "Stock Exchange Jurisdiction", + "description": "The 2-letter country code (ISO 3166-1) or the subdivision code (ISO 3166-2) for the jurisdiction under which the exchange, market or trading platform is regulated.", + "maxLength": 6, + "minLength": 2 + }, + "stockExchangeName": { + "type": "string", + "title": "Stock Exchange Name", + "description": "The name of the exchange, market or trading platform on which the security is traded. If the security is traded on a segment of the exchange, then the name SHOULD include both elements. For example, 'London Stock Exchange - MTF'." + }, + "security": { + "type": "object", + "title": "Security", + "description": "Identifying information of the stock or other security.", + "required": [ + "ticker" + ], + "properties": { + "idScheme": { + "type": "string", + "title": "Identifier Scheme", + "description": "The scheme under which the security has been issued a unique, persistent identifier, using the securitiesIdentifierSchemes codelist.", + "enum": [ + "isin", + "figi", + "cusip", + "cins" + ], + "codelist": "securitiesIdentifierSchemes.csv" + }, + "id": { + "type": "string", + "title": "Identifier", + "description": "The unique identifier of the security as issued under the `idScheme`." + }, + "ticker": { + "type": "string", + "title": "Stock Ticker", + "description": "The stock ticker identifying this security on the named stock exchange." + } + } + } + } + } + } +} diff --git a/libcovebods/data/schema-0-4-0/person-record.json b/libcovebods/data/schema-0-4-0/person-record.json new file mode 100644 index 0000000..2005461 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/person-record.json @@ -0,0 +1,245 @@ +{ + "$id": "urn:person", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "version": "0.4", + "type": "object", + "title": "Person Record Details", + "description": "Information about a natural person.", + "properties": { + "isComponent": { + "title": "Is component", + "description": "Whether this person is a component of an indirect relationship. Where `isComponent` is 'true': (1) the `recordId` of this person MUST be an element in the `componentRecords` array of that primary Relationship Statement, (2) this Person Statement MUST come before that primary Relationship Statement in a BODS package or stream, (3) the replacement of this Person Statement SHOULD be considered when replacing the primary Relationship Statement. The primary Relationship Statement MUST have a `isComponent` value of 'false'.", + "type": "boolean" + }, + "personType": { + "title": "Person Type", + "description": "The status of this person, using the personType codelist. Where a person has the type 'anonymousPerson' or 'unknownPerson' a reason for the absence of information SHOULD be provided in 'unspecifiedPersonDetails'", + "type": "string", + "enum": [ + "anonymousPerson", + "unknownPerson", + "knownPerson" + ], + "propertyOrder": 4, + "codelist": "personType.csv", + "openCodelist": false + }, + "unspecifiedPersonDetails": { + "$ref": "urn:components#/$defs/UnspecifiedRecord", + "description": "An explanation of why a person is anonymous or unknown." + }, + "names": { + "title": "Names", + "description": "One or more known names for this person.", + "type": "array", + "items": { + "$ref": "#/$defs/Name" + }, + "propertyOrder": 10 + }, + "identifiers": { + "title": "Identifiers", + "description": "One or more official identifiers for this person. Where available, official registration numbers should be provided.", + "type": "array", + "items": { + "$ref": "urn:components#/$defs/Identifier" + }, + "propertyOrder": 20 + }, + "nationalities": { + "title": "Nationality", + "description": "The nationalities held by this person.", + "type": "array", + "items": { + "$ref": "urn:components#/$defs/Country" + }, + "propertyOrder": 30 + }, + "placeOfBirth": { + "title": "Place of Birth", + "description": "The place where this person was born.", + "propertyOrder": 40, + "$ref": "urn:components#/$defs/Address", + "properties": { + "type": { + "enum": [ + "placeOfBirth" + ] + } + } + }, + "birthDate": { + "title": "Date of Birth", + "description": "The date of birth for this person, in YYYY, YYYY-MM, or YYYY-MM-DD format.", + "type": "string", + "anyOf": [ + { + "pattern": "^(\\d{4})(-(1[0-2]|0[1-9]))?$" + }, + { + "format": "date" + } + ], + "propertyOrder": 35 + }, + "deathDate": { + "title": "Death Date", + "description": "The date of death for this person, in YYYY, YYYY-MM, or YYYY-MM-DD format.", + "type": "string", + "anyOf": [ + { + "pattern": "^(\\d{4})(-(1[0-2]|0[1-9]))?$" + }, + { + "format": "date" + } + ], + "propertyOrder": 36 + }, + "taxResidencies": { + "title": "Tax Residency", + "description": "The tax residencies held by this person, as an array of Country objects.", + "type": "array", + "items": { + "$ref": "urn:components#/$defs/Country" + }, + "propertyOrder": 55 + }, + "addresses": { + "title": "Addresses", + "description": "One or more addresses for this person.", + "type": "array", + "items": { + "$ref": "urn:components#/$defs/Address", + "properties": { + "type": { + "enum": [ + "residence", + "service", + "alternative" + ] + } + } + }, + "propertyOrder": 60 + }, + "politicalExposure": { + "type": "object", + "title": "Political Exposure", + "description": "Information about whether, and how, the person described by this statement is politically exposed. Use this property only if politically exposed person (PEP) declarations are expected as part of beneficial ownership declarations.", + "required": [ + "status" + ], + "properties": { + "status": { + "type": "string", + "title": "Politically Exposed Person (PEP) Status", + "description": "This value is 'isPep' or 'isNotPep' according to whether the person described by this statement has the status of politically exposed person (PEP). An 'unknown' value means a PEP status declaration is expected but missing; the reason for the missing data SHOULD be supplied in the `details` array.", + "enum": [ + "isPep", + "isNotPep", + "unknown" + ] + }, + "details": { + "type": "array", + "title": "Politically Exposed Person (PEP) Details", + "description": "One or more descriptions of this person's Politically Exposed Person (PEP) status.", + "items": { + "$ref": "#/$defs/PepStatusDetails" + } + } + } + } + }, + "required": [ + "personType", + "isComponent" + ], + "$defs": { + "Name": { + "title": "Name", + "description": "A name by which this person is known. A name MUST be provided in `fullName`, and MAY be broken down in the `familyName`, `givenName` and `patronymicName` fields, based on the EC ISA Core Person Vocabulary (https://joinup.ec.europa.eu/solution/e-government-core-vocabularies) definitions.", + "type": "object", + "required": [ + "fullName" + ], + "properties": { + "type": { + "title": "Type", + "description": "The status of this name for the person, using the nameType codelist.", + "type": "string", + "enum": [ + "legal", + "translation", + "transliteration", + "former", + "alternative", + "birth" + ], + "codelist": "nameType.csv", + "openCodelist": false + }, + "fullName": { + "title": "Full Name", + "description": "The complete name of the person.", + "type": "string" + }, + "familyName": { + "title": "Family Name", + "description": "Part of the person's `fullName` which is shared by family members. The value may include prefixes or suffixes, e.g. 'de Boer', 'van de Putte', 'von und zu Orlow'. The value may be a multiple-part family name, such as are commonly found in Hispanic countries. For example, Miguel de Cervantes Saavedra's Family Name would be recorded as 'de Cervantes Saavedra.'", + "type": "string" + }, + "givenName": { + "title": "Given Names", + "description": "The part of the person's `fullName` that identifies the person within their family. These are given to a person by their parents at birth or may be legally recognised as 'given names' through a formal process. For example, the given name for Johann Sebastian Bach is 'Johann Sebastian'.", + "type": "string" + }, + "patronymicName": { + "title": "Patronymic Name", + "description": "Part of the person's `fullName` which is inherited from their father, as is common in countries such as Iceland, Ethiopia and Russia. For example, the 'Sergeyevich' in 'Mikhail Sergeyevich Gorbachev'.", + "type": "string" + } + } + }, + "PepStatusDetails": { + "title": "PEP Status Details", + "description": "Information about a person's political involvement.", + "type": "object", + "properties": { + "reason": { + "title": "Reason", + "description": "The reason for this person being declared a politically-exposed person.", + "type": "string" + }, + "missingInfoReason": { + "title": "Missing Information Reasons", + "description": "An explanation of why the PEP status for the person is not provided (i.e. `politicalExposure.status` is 'unknown'). This may be a standard descriptive phrase from the source system, or a free text justification. Where this field is present it should be the only field except for `source`.", + "type": "string" + }, + "jurisdiction": { + "title": "Jurisdiction", + "description": "The jurisdiction where this person is a PEP.", + "$ref": "urn:components#/$defs/Jurisdiction" + }, + "startDate": { + "title": "State Date", + "description": "The date from which this person had the status of a Politically-exposed Person (PEP). The date MUST be given in YYYY-MM-DD format. Where a precise month or date are unknown, the value may be rounded to the first day of the (first) month. This rounding SHOULD be noted in accompanying guidance (such as a publication policy or data use guide).", + "type": "string", + "format": "date" + }, + "endDate": { + "title": "End Date", + "description": "The date from which this person no longer had the status of a Politically-exposed Person (PEP). The date MUST be given in YYYY-MM-DD format. Where a precise month or date are unknown, the value may be rounded to the first day of the (first) month. This rounding SHOULD be noted in accompanying guidance (such as a publication policy or data use guide).", + "type": "string", + "format": "date" + }, + "source": { + "title": "Source", + "description": "The source of this PEP information", + "$ref": "urn:components#/$defs/Source" + } + } + } + } +} diff --git a/libcovebods/data/schema-0-4-0/relationship-record.json b/libcovebods/data/schema-0-4-0/relationship-record.json new file mode 100644 index 0000000..a5e5d35 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/relationship-record.json @@ -0,0 +1,191 @@ +{ + "$id": "urn:relationship", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "version": "0.4", + "title": "Relationship Record Details", + "description": "Information about the interests that an interested party (a person or entity) holds in the subject (an entity).", + "type": "object", + "properties": { + "isComponent": { + "title": "Is component", + "description": "Whether this relationship is a component of a wider indirect relationship. Where `isComponent` is 'true': (1) the `recordId` of this secondary Relationship Statement MUST be an element in the `componentRecords` array of that primary Relationship Statement, (2) this Relationship Statement MUST come before that primary Relationship Statement in a BODS package or stream, (3) the replacement of this Relationship Statement SHOULD be considered when replacing the primary Relationship Statement, and (4) the primary Relationship Statement MUST have an `isComponent` value of 'false'. Where `isComponent` is 'false', this Relationship Statement is the primary declaration of the relationship between the `subject` and the `interestedParty`.", + "type": "boolean" + }, + "componentRecords": { + "title": "Component Record IDs", + "description": "The `recordId` values of all component records that provide detail about this relationship between the `subject` and the `interestedParty` (if it is indirect). If this relationship has components, its own `isComponent` value MUST be 'false'.", + "type": "array", + "items": { + "type": "string" + } + }, + "subject": { + "title": "Subject", + "description": "The `recordId` for the subject of the relationship, or a reason why the subject cannot be specified. The subject MUST be an entity.", + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "urn:components#/$defs/UnspecifiedRecord" + } + ] + }, + "interestedParty": { + "title": "Interested Party", + "description": "The `recordId` for the interested party in the relationship, or an Unspecified Record object with a reason for why this information has not been disclosed. The interested party MAY be an entity or a person. An Unspecified Record SHOULD only be used where no information at all is known about interested parties beyond this point of the beneficial ownership network. If the interested party is known to be an entity or person but their particular identity is unavailable, a `recordId` for them SHOULD be provided here (and the their `recordDetails.[person|entity]Type` should indicate that they are anonymous or unknown).", + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "urn:components#/$defs/UnspecifiedRecord" + } + ] + }, + "interests": { + "title": "Interests", + "description": "A description of the interests held by the interested party in the subject.", + "type": "array", + "items": { + "$ref": "#/$defs/Interest" + } + } + }, + "required": [ + "isComponent", + "subject", + "interestedParty" + ], + "if": { + "properties":{ + "isComponent":{ + "const": true + } + } + }, + "then": { + "properties":{ + "componentRecords":{ + "const": [] + } + } + }, + "$defs": { + "Interest": { + "title": "Interest", + "description": "A description of an interest held by an interestedParty in the subject.", + "type": "object", + "properties": { + "type": { + "title": "Type of Interest", + "description": "The nature of the interest, using the interestType codelist.", + "type": "string", + "enum": [ + "shareholding", + "votingRights", + "appointmentOfBoard", + "otherInfluenceOrControl", + "seniorManagingOfficial", + "settlor", + "trustee", + "protector", + "beneficiaryOfLegalArrangement", + "rightsToSurplusAssetsOnDissolution", + "rightsToProfitOrIncome", + "rightsGrantedByContract", + "conditionalRightsGrantedByContract", + "controlViaCompanyRulesOrArticles", + "controlByLegalFramework", + "boardMember", + "boardChair", + "unknownInterest", + "unpublishedInterest", + "enjoymentAndUseOfAssets", + "rightToProfitOrIncomeFromAssets", + "nominee", + "nominator" + ], + "codelist": "interestType.csv", + "openCodelist": false + }, + "directOrIndirect": { + "title": "Direct or Indirect", + "description": "How directly the interest is exercised by the interested party. The value MUST be 'indirect' if intermediate entities or agents are known to exist, and MUST be 'direct' if such intermediaries are known not to exist. Otherwise the value MUST be 'unknown'.", + "type": "string", + "enum": [ + "direct", + "indirect", + "unknown" + ], + "codelist": "directOrIndirect.csv", + "openCodelist": false + }, + "beneficialOwnershipOrControl": { + "title": "Beneficial Ownership or Control", + "description": "Whether this interest (alone or with others) means the interested party is a beneficial owner of the subject. If 'true' the interested party MUST be a natural person. The definition of 'beneficial owner' in operation SHOULD be noted in accompanying guidance (such as a publication policy or data use guide).", + "type": "boolean" + }, + "details": { + "title": "Details", + "description": "The local name given to this kind of interest, or further information (semi-structured or unstructured) to clarify the nature of the interest.", + "type": "string" + }, + "share": { + "title": "Percentage Share", + "description": "The proportion of this type of interest held by the interested party, where an interest is countable. Provide the `exact` percentage if known. Otherwise, `minimum` (or `exclusiveMinimum`) and `maximum` (or `exclusiveMaximum`) can be used to record the range into which the proportion falls. (The `minimum` and `maximum` values are inclusive.)", + "type": "object", + "properties": { + "exact": { + "title": "Exact percentage", + "description": "The exact share of this interest held (if available).", + "type": "number", + "maximum": 100, + "minimum": 0 + }, + "maximum": { + "title": "Maximum Percentage", + "description": "The inclusive upper bound of the share of this interest.", + "type": "number", + "maximum": 100, + "minimum": 0 + }, + "minimum": { + "title": "Minimum Percentage", + "description": "The inclusive lower bound of the share of this interest.", + "type": "number", + "maximum": 100, + "minimum": 0 + }, + "exclusiveMinimum": { + "title": "Exclusive Minimum percentage", + "description": "The exclusive lower bound of the share of this interest.", + "type": "number", + "maximum": 100, + "minimum": 0 + }, + "exclusiveMaximum": { + "title": "Exclusive Maximum Percentage", + "description": "The exclusive upper bound of the share of this interest.", + "type": "number", + "maximum": 100, + "minimum": 0 + } + } + }, + "startDate": { + "title": "Start Date", + "description": "The date from which this interest was active. The date MUST be given in YYYY-MM-DD format. Where a precise month or date are unknown, the value may be rounded to the first day of the (first) month. This rounding SHOULD be noted in accompanying guidance (such as a publication policy or data use guide).", + "type": "string", + "format": "date" + }, + "endDate": { + "title": "End Date", + "description": "The date from which this interest ceased to exist. The date MUST be given in YYYY-MM-DD format. Where a precise month or date are unknown, the value may be rounded to the first day of the (first) month. This rounding SHOULD be noted in accompanying guidance (such as a publication policy or data use guide).", + "type": "string", + "format": "date" + } + } + } + } +} diff --git a/libcovebods/data/schema-0-4-0/statement.json b/libcovebods/data/schema-0-4-0/statement.json new file mode 100644 index 0000000..b626336 --- /dev/null +++ b/libcovebods/data/schema-0-4-0/statement.json @@ -0,0 +1,337 @@ +{ + "$id": "urn:statement", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Statements", + "description": "An array of Statements.", + "version": "0.4", + "type": "array", + "items": { + "$ref": "#/$defs/Statement" + }, + "$defs": { + "Statement": { + "title": "Statement", + "description": "A claim about a person, entity or relationship, made at a particular point in time.", + "type": "object", + "properties": { + "statementId": { + "title": "Statement Identifier", + "description": "A persistent globally unique identifier for this Statement. Length MUST be 32 - 64 characters (inclusive).", + "type": "string", + "minLength": 32, + "maxLength": 64 + }, + "statementDate": { + "title": "Statement Date", + "description": "The date on which this statement was declared by the source, in full-date (YYYY-MM-DD) or date-time (e.g. YYYY-MM-DDTHH:MM:SSZ) format. See the IETF RFC3339 standard, section 5.6.", + "type": "string", + "anyOf": [ + { + "format": "date" + }, + { + "format": "date-time" + } + ] + }, + "annotations": { + "title": "Annotations", + "description": "Annotations about this Statement or parts of this Statement", + "type": "array", + "items": { + "$ref": "#/$defs/Annotation" + } + }, + "publicationDetails": { + "title": "Publication Details", + "description": "Information concerning the publication of this Statement.", + "type": "object", + "properties": { + "publicationDate": { + "title": "Publication date", + "description": "The date on which this statement was published, in full-date (YYYY-MM-DD) or date-time (e.g. YYYY-MM-DDTHH:MM:SSZ) format. See the IETF RFC3339 standard, section 5.6.", + "type": "string", + "anyOf": [ + { + "format": "date" + }, + { + "format": "date-time" + } + ] + }, + "bodsVersion": { + "title": "BODS Version", + "description": "The version of the Beneficial Ownership Data Standard to which this Statement conforms, expressed as major.minor (e.g. 0.2 or 1.0). In a published BODS dataset, all Statements MUST have the same major version number.", + "type": "string", + "pattern": "^(\\d+\\.)(\\d+)$" + }, + "license": { + "title": "License URL", + "description": "A link to the license that applies to this Statement. The canonical URI of the license SHOULD be used. Publishers are encouraged to use a Public Domain Dedication or Open Definition Conformant (http://opendefinition.org/licenses/) license.", + "type": "string", + "format": "uri" + }, + "publisher": { + "type": "object", + "title": "Publisher", + "description": "Details of the organisation or person publishing a Statement.", + "properties": { + "name": { + "title": "Name", + "description": "The name of the publisher.", + "type": "string" + }, + "url": { + "title": "URL", + "description": "The URL where details of the full dataset, or of the publisher, can be found.", + "type": "string", + "format": "uri" + } + }, + "anyOf": [ + { + "required": [ + "name" + ] + }, + { + "required": [ + "url" + ] + } + ] + } + }, + "required": [ + "publicationDate", + "bodsVersion", + "publisher" + ] + }, + "source": { + "title": "Source", + "description": "The source of information in this statement. Each statement SHOULD contain source information.", + "$ref": "urn:components#/$defs/Source" + }, + "declaration": { + "title": "Declaration Reference", + "description": "An identifier or reference for a declaration within the publisher’s system. Where a Statement is a claim from a particular declaration (made at a point in time by a `source` about a `declarationSubject`) this field identifies the declaration.", + "type": "string" + }, + "declarationSubject": { + "title": "Declaration Subject", + "description": "A `recordId` value for the subject of a beneficial ownership network (always an entity or person).", + "type": "string" + }, + "recordId": { + "title": "Record Identifier", + "description": "A unique identifier for the record (within the publisher's system) to which this Statement relates. (A record captures information about an entity, natural person or relationship within the beneficial ownership network of a particular declaration subject.)", + "type": "string" + }, + "recordType": { + "title": "Record Type", + "description": "The type of record (within the publisher's system) to which this Statement relates: entity, person, or relationship.", + "type": "string", + "enum": [ + "entity", + "person", + "relationship" + ], + "codelist": "recordType.csv", + "openCodelist": false + }, + "recordStatus": { + "title": "Record Status", + "description": "The lifecycle status of the record (within the publisher's system) to which this Statement relates, using the recordStatus codelist.", + "type": "string", + "enum": [ + "new", + "updated", + "closed" + ], + "codelist": "recordStatus.csv", + "openCodelist": false + }, + "recordDetails": { + "title": "Record Details", + "description": "The details of the entity, person or relationship as declared on the Statement Date.", + "type": "object" + } + }, + "allOf": [ + { + "if": { + "properties": { + "recordType": { + "const": "entity" + } + } + }, + "then": { + "properties": { + "recordDetails": { + "$ref": "urn:entity" + } + } + } + }, + { + "if": { + "properties": { + "recordType": { + "const": "person" + } + } + }, + "then": { + "properties": { + "recordDetails": { + "$ref": "urn:person" + } + } + } + }, + { + "if": { + "properties": { + "recordType": { + "const": "relationship" + } + } + }, + "then": { + "properties": { + "recordDetails": { + "$ref": "urn:relationship" + } + } + } + } + ], + "required": [ + "statementId", + "declarationSubject", + "recordId", + "recordType", + "recordDetails", + "statementDate" + ] + }, + "Annotation": { + "title": "Annotation", + "description": "Additional information about the data contained in this Statement. Annotations can apply to a whole statement, an object or a single field. Custom properties can be included within the Annotation object to provide structured data where required.", + "type": "object", + "properties": { + "statementPointerTarget": { + "title": "Statement Fragment Pointer", + "description": "An RFC6901 JSON Pointer (https://tools.ietf.org/html/rfc6901) describing the target fragment of the statement that this Annotation applies to, starting from the root of the Statement. An empty string (\"\") indicates that the Annotation applies to the whole Statement.", + "type": "string" + }, + "creationDate": { + "title": "Creation Date", + "description": "The date on which this Annotation was created, in full-date (YYYY-MM-DD) or date-time (e.g. YYYY-MM-DDTHH:MM:SSZ) format. See the IETF RFC3339 standard, section 5.6.", + "type": "string", + "anyOf": [ + { + "format": "date" + }, + { + "format": "date-time" + } + ] + }, + "createdBy": { + "title": "Created By", + "description": "The person, organisation or agent that created this Annotation.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The name of the person, organisation or agent that created this Annotation.", + "type": "string" + }, + "uri": { + "title": "URI", + "description": "An optional URI to identify the person, organisation or agent that created this Annotation.", + "type": "string", + "format": "uri" + } + } + }, + "motivation": { + "title": "Motivation", + "description": "The reason for this Annotation, using the annotationMotivation codelist.", + "type": "string", + "enum": [ + "commenting", + "correcting", + "identifying", + "linking", + "transformation" + ], + "codelist": "annotationMotivation.csv", + "openCodelist": false + }, + "description": { + "title": "Description", + "description": "A free text description providing extra information about part of this Statement.", + "type": "string" + }, + "transformedContent": { + "type": "string", + "title": "Transformed Content", + "description": "A representation of the Annotation target after the transformation in the `description` field has been applied. This field MUST only be used when the `motivation` is 'transformation'." + }, + "url": { + "title": "URL", + "description": "A linked resource that annotates, provides context for or enhances this Statement. The content of the resource, or the relationship to the statement, MAY be described in the `description` field. This field is REQUIRED if the value of `motivation` is 'linking'.", + "type": "string", + "format": "uri" + } + }, + "allOf": [ + { + "if": { + "properties": { + "motivation": { + "const": "linking" + } + } + }, + "then": { + "required": [ + "statementPointerTarget", + "motivation", + "url" + ] + }, + "else": { + "required": [ + "statementPointerTarget", + "motivation" + ] + } + }, + { + "if": { + "not": { + "properties": { + "motivation": { + "const": "transformation" + } + } + } + }, + "then": { + "properties": { + "transformedContent":{ + "const": "" + } + } + } + } + ] + } + } +} diff --git a/libcovebods/jsonschemavalidate.py b/libcovebods/jsonschemavalidate.py index 8d1cbf8..c956a41 100644 --- a/libcovebods/jsonschemavalidate.py +++ b/libcovebods/jsonschemavalidate.py @@ -4,6 +4,7 @@ from jsonschema import FormatChecker from jsonschema.exceptions import ValidationError from jsonschema.validators import Draft4Validator +from jsonschema.validators import Draft202012Validator import libcovebods.data_reader from libcovebods.schema import SchemaBODS @@ -101,10 +102,20 @@ def __init__(self, schema: SchemaBODS): def validate(self, data_reader: libcovebods.data_reader.DataReader) -> list: """Call with data. Results are returned.""" - validator = Draft4Validator( - schema=self._schema._pkg_schema_obj, format_checker=FormatChecker() - ) - validator.VALIDATORS["oneOf"] = oneOf_draft4 + if self._schema.is_schema_version_equal_to_or_greater_than("0.4"): + # Get the registry + registry = self._schema._pkg_schema_obj + + # Make the validator + statement_schema = registry.contents("urn:statement") + validator = Draft202012Validator( + schema=statement_schema, registry=registry, format_checker=FormatChecker() + ) + else: + validator = Draft4Validator( + schema=self._schema._pkg_schema_obj, format_checker=FormatChecker() + ) + validator.VALIDATORS["oneOf"] = oneOf_draft4 output = [] all_data = data_reader.get_all_data() for e in validator.iter_errors(all_data): @@ -141,12 +152,16 @@ def __init__( def json(self): """Return representation of this error in JSON.""" - path_ending = self._path[-1] - if isinstance(self._path[-1], int) and len(self._path) >= 2: - # We're dealing with elements in an array of items at this point - path_ending = "{}/[number]".format(self._path[-2]) - elif isinstance(self._path[0], int) and len(self._path) == 1: - path_ending = "[number]" + #print(self._path, self._message) + if self._path: + path_ending = self._path[-1] + if isinstance(self._path[-1], int) and len(self._path) >= 2: + # We're dealing with elements in an array of items at this point + path_ending = "{}/[number]".format(self._path[-2]) + elif isinstance(self._path[0], int) and len(self._path) == 1: + path_ending = "[number]" + else: + path_ending = '$' return { "message": self._message, diff --git a/libcovebods/schema.py b/libcovebods/schema.py index 1a2945e..96aad4c 100644 --- a/libcovebods/schema.py +++ b/libcovebods/schema.py @@ -7,12 +7,19 @@ import libcovebods.data_reader from libcovebods.config import LibCoveBODSConfig +from libcovebods.schema_dir import schema_registry, get_scheme_file_data try: from functools import cached_property except ImportError: from cached_property import cached_property # type: ignore +def record_based_statement(statement): + if ("recordDetails" in statement or "recordId" in statement or + "recordType" in statement): + return True + else: + return False class SchemaBODS: def __init__( @@ -49,6 +56,16 @@ def __work_out_schema_version( # If bad data passed, then we assume it's the default version all_data = data_reader.get_all_data() if not isinstance(all_data, list) or len(all_data) == 0: + if not isinstance(all_data, list): + if record_based_statement(all_data): + self.schema_version = self.config.config["schema_latest_version"] + self.pkg_schema_url = self.config.config["schema_versions"][ + self.schema_version + ]["schema_url"] + self.schema_host = self.config.config["schema_versions"][ + self.schema_version + ]["schema_url_host"] + return self.pkg_schema_url = self.config.config["schema_url"] self.schema_host = self.config.config["schema_url_host"] self.schema_version_attempted = self.config.config["schema_version"] @@ -66,7 +83,7 @@ def __work_out_schema_version( or "bodsVersion" not in statement["publicationDetails"] ): # Use default version if not record based else latest version (revisit) - if not "recordDetails" in statement: + if not record_based_statement(statement): self.pkg_schema_url = self.config.config["schema_url"] self.schema_host = self.config.config["schema_url_host"] self.schema_version_attempted = self.config.config["schema_version"] @@ -92,7 +109,7 @@ def __work_out_schema_version( "schema_version": str(self.schema_version_attempted), } # Use latest non-record version if not record based else latest version (revisit) - if not "recordDetails" in statement: + if not record_based_statement(statement): self.schema_version = self.config.config["schema_latest_nonrecord_version"] self.pkg_schema_url = self.config.config["schema_versions"][ self.schema_version @@ -118,7 +135,7 @@ def __work_out_schema_version( "schema_version": self.schema_version_attempted, } # Use latest non-record version if not record based else latest version (revisit) - if not "recordDetails" in statement: + if not record_based_statement(statement): self.schema_version = self.config.config["schema_latest_nonrecord_version"] self.pkg_schema_url = self.config.config["schema_versions"][ self.schema_version @@ -154,13 +171,8 @@ def get_entity_statement_types_list(self): ): return statement_schema["properties"]["entityType"]["enum"] else: - for statement_schema in self._pkg_schema_obj['items']['allOf']: - if ( - statement_schema['if']['properties']['recordType']['const'] - == 'entity' - ): - entity_type = statement_schema['then']['properties']['recordDetails']['properties']['entityType'] - return entity_type['properties']['type']['enum'] + entity_schema = get_scheme_file_data(self.pkg_schema_url, 'entity') + return entity_schema['properties']['entityType']['properties']['type']['enum'] def get_person_statement_types_list(self): if self.is_schema_version_equal_to_or_less_than("0.3"): @@ -171,15 +183,8 @@ def get_person_statement_types_list(self): ): return statement_schema["properties"]["personType"]["enum"] else: - for statement_schema in self._pkg_schema_obj['items']['allOf']: - if ( - statement_schema['if']['properties']['recordType']['const'] - == 'person' - ): - person_type = statement_schema['then']['properties']['recordDetails']['properties']['personType'] - # Inconsistancy with schema for entity record (investigate???) - #return person_type['properties']['type']['enum'] - return person_type['enum'] + person_schema = get_scheme_file_data(self.pkg_schema_url, 'person') + return person_schema['properties']['personType']['enum'] def get_ownership_or_control_statement_interest_statement_types_list(self): if self.is_schema_version_equal_to_or_less_than("0.3"): @@ -190,14 +195,8 @@ def get_ownership_or_control_statement_interest_statement_types_list(self): ): return statement_schema["properties"]["interests"]["items"]["properties"]["type"]["enum"] else: - for statement_schema in self._pkg_schema_obj['items']['allOf']: - if ( - statement_schema['if']['properties']['recordType']['const'] - == 'relationship' - ): - interests = statement_schema['then']['properties']['recordDetails']['properties']["interests"] - print(interests) - return interests["items"]["properties"]["type"]["enum"] + relationship_schema = get_scheme_file_data(self.pkg_schema_url, 'relationship') + return relationship_schema["$defs"]["Interest"]["properties"]["type"]["enum"] def get_ownership_or_control_statement_interest_direct_or_indirect_list(self): if self.is_schema_version_equal_to_or_less_than("0.3"): @@ -215,13 +214,8 @@ def get_ownership_or_control_statement_interest_direct_or_indirect_list(self): else: return [] else: - for statement_schema in self._pkg_schema_obj['items']['allOf']: - if ( - statement_schema['if']['properties']['recordType']['const'] - == 'relationship' - ): - interests = statement_schema['then']['properties']['recordDetails']['properties']["interests"] - return interests["items"]["properties"]["directOrIndirect"] + relationship_schema = get_scheme_file_data(self.pkg_schema_url, 'relationship') + return relationship_schema["$defs"]["Interest"]["properties"]["directOrIndirect"]["enum"] def get_person_statement_political_exposure_status_list(self): if self.is_schema_version_equal_to_or_less_than("0.3"): @@ -239,13 +233,8 @@ def get_person_statement_political_exposure_status_list(self): else: return [] else: - for statement_schema in self._pkg_schema_obj['items']['allOf']: - if ( - statement_schema['if']['properties']['recordType']['const'] - == 'person' - ): - exposure = statement_schema['then']['properties']['recordDetails']['properties']["politicalExposure"] - return exposure["properties"]["status"]["enum"] + person_schema = get_scheme_file_data(self.pkg_schema_url, 'person') + return person_schema['properties']["politicalExposure"]["properties"]["status"]["enum"] def get_inconsistent_schema_version_used_for_statement(self, statement): # If version is not set at all, then we assume it's the default version @@ -282,20 +271,37 @@ def is_schema_version_equal_to_or_less_than(self, version): version ) + def is_schema_version_less_than(self, version): + return packaging_version.parse(self.schema_version) <= packaging_version.parse( + version + ) + def get_package_schema_fields(self) -> set: - return set(schema_dict_fields_generator(self._pkg_schema_obj)) + if self.is_schema_version_equal_to_or_greater_than("0.4"): + print("Start:") + return set(schema_dict_fields_generator(self._pkg_schema_obj.contents("urn:statement"), + registry=self._pkg_schema_obj)) + else: + print("Start old:", self.schema_version) + return set(schema_dict_fields_generator(self._pkg_schema_obj)) @cached_property def pkg_schema_str(self): - uri_scheme = urlparse(self.pkg_schema_url).scheme - if uri_scheme == "http" or uri_scheme == "https": - raise NotImplementedError( - "Downloading schema files over HTTP/HTTPS is not supported" - ) + if self.is_schema_version_equal_to_or_greater_than("0.4"): + return "" else: - with open(self.pkg_schema_url) as fp: - return fp.read() + uri_scheme = urlparse(self.pkg_schema_url).scheme + if uri_scheme == "http" or uri_scheme == "https": + raise NotImplementedError( + "Downloading schema files over HTTP/HTTPS is not supported" + ) + else: + with open(self.pkg_schema_url) as fp: + return fp.read() @property def _pkg_schema_obj(self): - return json.loads(self.pkg_schema_str) + if self.is_schema_version_equal_to_or_greater_than("0.4"): + return schema_registry(self.pkg_schema_url) + else: + return json.loads(self.pkg_schema_str) diff --git a/libcovebods/schema_dir.py b/libcovebods/schema_dir.py new file mode 100644 index 0000000..42646f0 --- /dev/null +++ b/libcovebods/schema_dir.py @@ -0,0 +1,34 @@ +import json +from pathlib import Path +from jscc.schema import is_json_schema, is_codelist, is_missing_property +from jscc.testing.filesystem import walk_json_data, walk_csv_data +from referencing import Registry, Resource +from referencing.jsonschema import DRAFT202012 + + +def get_schema_paths(schema_dir): + """ + Returns an array of paths, filenames, and contents (parsed JSON) for each of the schema files. + """ + schema_paths = [ + (path, name, data) for path, name, _, data in walk_json_data(top=schema_dir) if is_json_schema(data) + ] + return schema_paths + +def schema_registry(schema_dir): + """ + This loads the BODS schema files into a jsonschema registry, so the + validator can resolve $refs across all of the schema files. + """ + schemas = [] + for _, _, schema in get_schema_paths(schema_dir): + schemas.append((schema.get("$id"), Resource(contents=schema, specification=DRAFT202012))) + + registry = Registry().with_resources(schemas) + return registry + +def get_scheme_file_data(schema_dir, component): + for file_path in Path(schema_dir).glob("*.json"): + if file_path.name.startswith(component): + with open(file_path) as json_file: + return json.load(json_file) diff --git a/setup.py b/setup.py index 27ce7e6..5319be3 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ # 'missingPersonType' is a dependency of 'missingPersonReason' # in tests/fixtures/0.1/badfile_all_validation_errors.json "jsonschema==4.23.0", + "jscc==0.2.4", "pytz", "ijson", # Required for jsonschema to validate URIs