From 8a554078a9285133d30d3039eb865724ef644683 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 18:18:10 +0100 Subject: [PATCH 01/22] Add and use case-insensitive .source.get_source() --- sdmx/__init__.py | 3 ++- sdmx/client.py | 4 ++-- sdmx/source/__init__.py | 22 ++++++++++++++++++++++ sdmx/testing/__init__.py | 6 ++++-- sdmx/tests/test_source.py | 12 +++++++++++- 5 files changed, 41 insertions(+), 6 deletions(-) diff --git a/sdmx/__init__.py b/sdmx/__init__.py index 26e4c85d3..942e2957e 100644 --- a/sdmx/__init__.py +++ b/sdmx/__init__.py @@ -5,7 +5,7 @@ from sdmx.format.xml.common import install_schemas, validate_xml from sdmx.reader import read_sdmx, to_sdmx from sdmx.rest import Resource -from sdmx.source import add_source, list_sources +from sdmx.source import add_source, get_source, list_sources from sdmx.writer import to_csv, to_pandas, to_xml __all__ = [ @@ -13,6 +13,7 @@ "Request", "Resource", "add_source", + "get_source", "install_schemas", "list_sources", "log", diff --git a/sdmx/client.py b/sdmx/client.py index 00cf2f5d7..8426cd201 100644 --- a/sdmx/client.py +++ b/sdmx/client.py @@ -10,7 +10,7 @@ from sdmx.reader import get_reader from sdmx.rest import Resource from sdmx.session import ResponseIO, Session -from sdmx.source import NoSource, list_sources, sources +from sdmx.source import NoSource, get_source, list_sources if TYPE_CHECKING: import io @@ -79,7 +79,7 @@ def __init__( **session_opts, ): try: - self.source = sources[source.upper()] if source else NoSource + self.source = get_source(source) if source else NoSource except KeyError: raise ValueError( f"source must be None or one of: {' '.join(list_sources())}" diff --git a/sdmx/source/__init__.py b/sdmx/source/__init__.py index 6cbbfa9fd..fd2a0e8eb 100644 --- a/sdmx/source/__init__.py +++ b/sdmx/source/__init__.py @@ -1,5 +1,7 @@ import importlib.resources import json +import logging +import re from dataclasses import dataclass, field from enum import Enum from importlib import import_module @@ -15,6 +17,8 @@ if TYPE_CHECKING: import sdmx.rest.common +log = logging.getLogger(__name__) + #: Data sources registered with :mod:`sdmx`. sources: dict[str, "Source"] = {} @@ -250,6 +254,24 @@ def add_source( sources[id] = SourceClass(**_info) +def get_source(id: str) -> Source: + """Return the Source with the given `id`. + + `id` is matched case-insensitively. + """ + try: + return sources[id] + except KeyError: + # Try to find a case-insensitive match + for k, v in sources.items(): + if re.match(k, id, flags=re.IGNORECASE): + log.debug( + f"Return source {v.id!r} as a case-insensitive match for id {id!r}" + ) + return v + raise + + def list_sources(): """Return a sorted list of valid source IDs. diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 58daf3426..231b84328 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -12,7 +12,7 @@ from sdmx.exceptions import HTTPError from sdmx.rest import Resource from sdmx.session import Session -from sdmx.source import DataContentType, Source, sources +from sdmx.source import DataContentType, Source, get_source from sdmx.testing.report import ServiceReporter from sdmx.util.requests import offline @@ -151,7 +151,7 @@ class (e.g. :class:`.DataSourceTest` subclass). # Use the test class' source_id attr to look up the Source class cls = metafunc.cls source = ( - sources[cls.source_id] + get_source(cls.source_id) if cls.source_id != "TEST" else metafunc.config.stash[KEY_SOURCE] ) @@ -272,6 +272,8 @@ def test_data_path(pytestconfig): @pytest.fixture(scope="class") def testsource(pytestconfig): """Fixture: the :attr:`.Source.id` of a temporary data source.""" + from sdmx.source import sources + s = pytestconfig.stash[KEY_SOURCE] sources[s.id] = s diff --git a/sdmx/tests/test_source.py b/sdmx/tests/test_source.py index e3c0143cd..db393c99f 100644 --- a/sdmx/tests/test_source.py +++ b/sdmx/tests/test_source.py @@ -1,7 +1,17 @@ import pytest from sdmx.model import v21 as model -from sdmx.source import Source, add_source, list_sources, sources +from sdmx.source import Source, add_source, get_source, list_sources, sources + + +def test_get_source(caplog): + s1 = get_source("WB") + assert 0 == len(caplog.messages) + + s2 = get_source("wb") + assert "'WB' as a case-insensitive match for id 'wb'" in caplog.messages[-1] + + assert s1 == s2 def test_list_sources(): From c201adafbc64ef42fa8990538379786c9b52fbe7 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 18:19:19 +0100 Subject: [PATCH 02/22] Drop Python 3.8 importlib.resources compat --- sdmx/source/__init__.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/sdmx/source/__init__.py b/sdmx/source/__init__.py index fd2a0e8eb..59549bb23 100644 --- a/sdmx/source/__init__.py +++ b/sdmx/source/__init__.py @@ -282,15 +282,7 @@ def list_sources(): def load_package_sources(): """Discover all sources listed in :file:`sources.json`.""" - try: - ref = importlib.resources.files("sdmx").joinpath("sources.json") - except AttributeError: # Python <3.9 - from copy import copy - - with importlib.resources.path("sdmx", "sources.json") as path: - ref = copy(path) - - with ref.open("rb") as f: + with importlib.resources.files("sdmx").joinpath("sources.json").open("rb") as f: for info in json.load(f): add_source(info) From 027e1e5e94235778c5ae083f818997e6c480f1a8 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 18:20:53 +0100 Subject: [PATCH 03/22] Add AR1 data provider, docs, tests Closes #188. --- doc/sources.rst | 30 ++++++++++++++++++++++++++++++ sdmx/sources.json | 26 ++++++++++++++++++++++++++ sdmx/tests/test_sources.py | 13 +++++++++++++ 3 files changed, 69 insertions(+) diff --git a/doc/sources.rst b/doc/sources.rst index 91eb6a810..b33f82446 100644 --- a/doc/sources.rst +++ b/doc/sources.rst @@ -115,6 +115,36 @@ SDMX-JSON — .. autoclass:: sdmx.source.abs_json.Source() :members: +.. _AR1: + +``AR1``: National Institute of Statistics and Censuses (Argentina) +------------------------------------------------------------------ + +SDMX-ML — `Website `__ + +- Spanish name: Instituto Nacional de Estadística y Censos + +This source does not provide an actual SDMX-REST web service. +Instead, a set of SDMX-ML 2.1 files with data messages only (no structure or metadata) are available at URLs with the form: ``https://sdds.indec.gob.ar/files/data/IND.XML``. +These can be used with :class:`Client` by: + +- Using ``https://sdds.indec.gob.ar/files/`` as the base URL. +- Accessing only the :attr:`.Resource.data` endpoint, which gives the ``…/data/…`` URL component. +- Treating ``IND.XML`` (in reality, a file name with suffix) as the resource ID. +- Using no query key or parameters. + +.. code-block:: python + + c = sdmx.Client("AR1") + # The URL https://sdds.indec.gob.ar/files/data/IND.XML + dm = c.data("IND.XML") + +This is the same as using a non-source-specific Client to query the URL directly: + +.. code-block:: python + + c = sdmx.Client() + dm = c.get(url="https://sdds.indec.gob.ar/files/data/IND.XML") .. _BBK: diff --git a/sdmx/sources.json b/sdmx/sources.json index 8b1854545..9bdd80957 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -26,6 +26,32 @@ "url": "https://api.data.abs.gov.au", "name": "Australian Bureau of Statistics" }, + { + "id": "AR1", + "name": "Argentina", + "url": "https://sdds.indec.gob.ar/files/", + "supports": { + "actualconstraint": false, + "allowedconstraint": false, + "agencyscheme": false, + "categorisation": false, + "categoryscheme": false, + "codelist": false, + "conceptscheme": false, + "contentconstraint": false, + "dataconsumerscheme": false, + "dataproviderscheme": false, + "dataflow": false, + "datastructure": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "organisationscheme": false, + "provisionagreement": false, + "structure": false, + "structureset": false + } + }, { "id": "BBK", "url": "https://api.statistiken.bundesbank.de/rest", diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 29e39696d..8d2cc5275 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -120,6 +120,19 @@ class TestABS_JSON(DataSourceTest): } +class TestAR1(DataSourceTest): + source_id = "AR1" + + endpoint_args = dict( + data=dict(resource_id="WOE.XML"), + ) + + xfail = { + "metadata": NotImplementedError, # Internal to sdmx1 + "registration": ValueError, # Internal to sdmx1 + } + + class TestBBK(DataSourceTest): source_id = "BBK" From 12974c708050b328d564e51107654cc9f5709e4f Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 18:21:56 +0100 Subject: [PATCH 04/22] Add StatCan REST data source, docs, tests Closes #186. --- doc/sources.rst | 20 ++++++++++++++++++++ sdmx/sources.json | 26 ++++++++++++++++++++++++++ sdmx/tests/test_sources.py | 18 ++++++++++++++++++ 3 files changed, 64 insertions(+) diff --git a/doc/sources.rst b/doc/sources.rst index b33f82446..5db8defb0 100644 --- a/doc/sources.rst +++ b/doc/sources.rst @@ -455,6 +455,26 @@ API documentation `(en) `__, +`(fr) `__. + +- The source only provides a SDMX-REST API for the ``/data/`` endpoint. +- Some structural artefacts are available, but not through an SDMX-REST API. + Instead, a set of SDMX-ML 2.1 files with structure messages are available at URLs with the form: ``https://www150.statcan.gc.ca/t1/wds/sdmx/statcan/rest/structure/Data_Structure_17100005``. + (Note that this lacks the URL path components for the agency ID and version, which would resemble ``…/structure/StatCan/Data_Structure_17100005/latest``.) + + These can be queried directly using any Client: + + .. code-block:: python + + c = sdmx.Client("StatCan") # or sdmx.Client() + dm = c.get(url="https://www150.statcan.gc.ca/t1/wds/sdmx/statcan/rest/structure/Data_Structure_17100005") .. _UNESCO: diff --git a/sdmx/sources.json b/sdmx/sources.json index 9bdd80957..490996f62 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -383,6 +383,32 @@ "url": "http://andmebaas.stat.ee/sdmx-json", "name": "Statistics Estonia" }, + { + "id": "StatCan", + "name": "Statistics Canada", + "url": "https://www150.statcan.gc.ca/t1/wds/sdmx/statcan/rest/", + "supports": { + "actualconstraint": false, + "allowedconstraint": false, + "agencyscheme": false, + "categorisation": false, + "categoryscheme": false, + "codelist": false, + "conceptscheme": false, + "contentconstraint": false, + "dataconsumerscheme": false, + "dataproviderscheme": false, + "dataflow": false, + "datastructure": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "organisationscheme": false, + "provisionagreement": false, + "structure": false, + "structureset": false + } + }, { "id": "UNESCO", "name": "UN Educational, Scientific and Cultural Organization", diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 8d2cc5275..0914225df 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -594,6 +594,24 @@ class TestSTAT_EE(DataSourceTest): } +class TestStatCan(DataSourceTest): + source_id = "StatCan" + + endpoint_args = dict( + data=dict( + resource_id="DF_17100005", + key=".1.138", + params=dict(startPeriod=2015, endPeriod=2016), + ), + structure=dict(resource_id="Data_Structure_17100005"), + ) + + xfail = { + "metadata": NotImplementedError, # Internal to sdmx1 + "registration": ValueError, # Internal to sdmx1 + } + + class TestUNESCO(DataSourceTest): """UNESCO. From 99fcd13f36bd7db232954301357205a103d7c2d7 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 18:22:57 +0100 Subject: [PATCH 05/22] Add UY110 REST data source, docs, tests Closes #187. --- doc/sources.rst | 11 +++++++++++ sdmx/sources.json | 15 +++++++++++++++ sdmx/tests/test_sources.py | 13 +++++++++++++ 3 files changed, 39 insertions(+) diff --git a/doc/sources.rst b/doc/sources.rst index 5db8defb0..9e419f115 100644 --- a/doc/sources.rst +++ b/doc/sources.rst @@ -553,6 +553,17 @@ SDMX-ML — - Supports preview_data and series-key based key validation. +.. _UY110: + +``UY110``: Labour Market Information System (Uruguay) +----------------------------------------------------- + +SDMX-ML — +Website `(en) `__, +`(es) `__. + +- Spanish name: Sistema de Información de Mercado Laboral +- Operated by the Ministry of Labour and Social Security of (Ministerio de Trabajo y Seguridad Social, MTSS), the National Institute of Statistics (Instituto Nacional de Estadística, INE) and the Social Security Bank (Banco de Previsión Social, BPS) of Uruguay. .. _WB: diff --git a/sdmx/sources.json b/sdmx/sources.json index 490996f62..c72720d7a 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -464,6 +464,21 @@ "preview": true } }, + { + "id": "UY110", + "name": "Uruguay", + "url": "https://sdmx-mtss.simel.mtss.gub.uy/rest", + "supports": { + "agencyscheme": false, + "dataconsumerscheme": false, + "dataproviderscheme": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "provisionagreement": false, + "structureset": false + } + }, { "id": "WB", "name": "World Bank World Integrated Trade Solution", diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 0914225df..1e0702f9f 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -666,6 +666,19 @@ class TestUNSD(DataSourceTest): } +class TestUY110(DataSourceTest): + source_id = "UY110" + + xfail = { + "metadata": NotImplementedError, # Internal to sdmx1 + # 400: "Can not create reference, target structure is not maintainable, and no + # identifiable reference parameters present" + "organisationscheme": HTTPError, + "registration": ValueError, # Internal to sdmx1 + "structure": NotImplementedError, # 501 + } + + class TestWB(DataSourceTest): source_id = "WB" xfail = { From 5a190cfe3490bfa2a0c6987e6b87c2a2dace304b Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 20:48:33 +0100 Subject: [PATCH 06/22] Add test of reading etc. in header --- sdmx/testing/data.py | 2 ++ sdmx/tests/reader/test_reader_xml_v21.py | 31 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/sdmx/testing/data.py b/sdmx/testing/data.py index 2a4ed5c95..e85843e9a 100644 --- a/sdmx/testing/data.py +++ b/sdmx/testing/data.py @@ -296,8 +296,10 @@ def add_specimens(target: list[tuple[Path, str, Optional[str]]], base: Path) -> target.extend( (base.joinpath(*parts), "xml", "data") for parts in [ + ("constructed", "gh-218.xml"), ("INSEE", "CNA-2010-CONSO-SI-A17.xml"), ("INSEE", "IPI-2010-A21.xml"), + ("IMF", "PCPS.xml"), ("ESTAT", "esms.xml"), ("ESTAT", "footer.xml"), ("ESTAT", "NAMA_10_GDP-ss.xml"), diff --git a/sdmx/tests/reader/test_reader_xml_v21.py b/sdmx/tests/reader/test_reader_xml_v21.py index f8e4b922d..e6b3155b9 100644 --- a/sdmx/tests/reader/test_reader_xml_v21.py +++ b/sdmx/tests/reader/test_reader_xml_v21.py @@ -11,6 +11,7 @@ import sdmx import sdmx.message from sdmx import urn +from sdmx.format.xml import validate_xml from sdmx.format.xml.v21 import qname from sdmx.model import common, v21 from sdmx.model.v21 import ContentConstraint, Facet, FacetType, FacetValueType @@ -293,6 +294,36 @@ def test_gh_205(caplog, specimen) -> None: assert text == str(a.text) +def test_gh_218(caplog, specimen) -> None: + """Test of https://github.com/khaeru/sdmx/pull/218.""" + with specimen("constructed/gh-218.xml") as f: + # Specimen is XSD-valid + validate_xml(f) + + f.seek(0) + + # Specimen can be read + msg = sdmx.read_sdmx(f) + + # The message sender has 1 contact, with all attributes populated + assert isinstance(msg, sdmx.message.DataMessage) and msg.header.sender + assert 1 == len(msg.header.sender.contact) + contact = msg.header.sender.contact[0] + assert contact.telephone is not None + assert ( + 1 + # Number of localizations of localizable attributes + == len(contact.name.localizations) + == len(contact.org_unit.localizations) + == len(contact.responsibility.localizations) + # Number of values of multi-value attributes + == len(contact.email) + == len(contact.fax) + == len(contact.uri) + == len(contact.x400) + ) + + # Each entry is a tuple with 2 elements: # 1. an instance of lxml.etree.Element to be parsed. # 2. Either: From ee19a3dd70c95e1d4be048880f1d65b158b640b6 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 20:49:40 +0100 Subject: [PATCH 07/22] Read SDMX-ML 2.1 --- sdmx/reader/xml/v21.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 86e9f6859..351164537 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -331,10 +331,11 @@ def _structures(reader, elem): @end( """ com:AnnotationTitle com:AnnotationType com:AnnotationURL com:None com:URN com:Value - mes:DataSetAction :ReportPeriod md:ReportPeriod mes:DataSetID mes:Email mes:ID - mes:Test mes:Timezone str:CodelistAliasRef str:DataType str:Email str:Expression - str:NullValue str:OperatorDefinition str:PersonalisedName str:Result - str:RulesetDefinition str:Telephone str:URI str:VtlDefaultName str:VtlScalarType + mes:DataSetAction :ReportPeriod md:ReportPeriod mes:DataSetID mes:Email mes:Fax + mes:ID mes:Telephone mes:Test mes:Timezone mes:URI mes:X400 str:CodelistAliasRef + str:DataType str:Email str:Expression str:NullValue str:OperatorDefinition + str:PersonalisedName str:Result str:RulesetDefinition str:Telephone str:URI + str:VtlDefaultName str:VtlScalarType """ ) def _text(reader, elem): @@ -745,10 +746,12 @@ def _contact_start(reader, elem): @end("mes:Contact str:Contact", only=False) def _contact_end(reader, elem): - contact = model.Contact( + contact = common.Contact( + email=reader.pop_all("Email"), + fax=reader.pop_all("Fax"), telephone=reader.pop_single("Telephone"), uri=reader.pop_all("URI"), - email=reader.pop_all("Email"), + x400=reader.pop_all("X400"), ) add_localizations(contact.name, reader.pop_all("Name")) From a27fdb8de877f1ee9b1b78854295fde29cbec0d1 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 21:29:52 +0100 Subject: [PATCH 08/22] Add test of reference to non-standard class (#180) --- sdmx/testing/data.py | 1 + sdmx/tests/reader/test_reader_xml_v21.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/sdmx/testing/data.py b/sdmx/testing/data.py index e85843e9a..bde173b35 100644 --- a/sdmx/testing/data.py +++ b/sdmx/testing/data.py @@ -312,6 +312,7 @@ def add_specimens(target: list[tuple[Path, str, Optional[str]]], base: Path) -> for parts in [ ("BIS", "actualconstraint-0.xml"), ("BIS", "hierarchicalcodelist-0.xml"), + ("BIS", "gh-180.xml"), ("ECB", "orgscheme.xml"), ("ECB", "structureset-0.xml"), ("ESTAT", "apro_mk_cola-structure.xml"), diff --git a/sdmx/tests/reader/test_reader_xml_v21.py b/sdmx/tests/reader/test_reader_xml_v21.py index e6b3155b9..4abdfa2ca 100644 --- a/sdmx/tests/reader/test_reader_xml_v21.py +++ b/sdmx/tests/reader/test_reader_xml_v21.py @@ -233,6 +233,29 @@ def test_gh_164(specimen): assert isinstance(da.related_to, v21.NoSpecifiedRelationship) +def test_gh_180(caplog, specimen) -> None: + """Test of https://github.com/khaeru/sdmx/issues/190.""" + with specimen("BIS/gh-180.xml") as f: + # Message is not valid SDMX-ML + assert False is validate_xml(f) + + # Validation logs an error message regarding the non-standard class + assert re.match( + ".*attribute 'package'.*'publicationtable' is not an element of the set", + caplog.messages[-1], + ) + + # Message can still be read + f.seek(0) + msg = sdmx.read_sdmx(f) + assert isinstance(msg, sdmx.message.StructureMessage) + + # Reader logs a warning regarding the missing reference + assert re.match( + "Cannot resolve reference to non-SDMX class", caplog.messages[-1] + ) + + def test_gh_199(): """Test of https://github.com/khaeru/sdmx/issues/199.""" import sdmx.format.xml.v21 From be8b8b685495f1e41173efe4b480e3a664f2a372 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 21:31:35 +0100 Subject: [PATCH 09/22] Log a warning and ignore ref to "PublicationTable" --- sdmx/reader/xml/common.py | 5 ++--- sdmx/reader/xml/v21.py | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/sdmx/reader/xml/common.py b/sdmx/reader/xml/common.py index 8fa15a432..20e434225 100644 --- a/sdmx/reader/xml/common.py +++ b/sdmx/reader/xml/common.py @@ -77,9 +77,8 @@ def __init__(self, reader, elem, cls_hint=None): # class above target_cls = cls_hint - # DEBUG - # if target_cls is None: - # print(f"{info = }") + if target_cls is None: + raise ValueError(f"Unable to determine target class for {info}", info) self.maintainable = issubclass(target_cls, common.MaintainableArtefact) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 351164537..1d05fba5e 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -404,7 +404,23 @@ def _ref(reader: Reader, elem): # In a StructureMessage cls_hint = reader.model.DataStructureDefinition - reader.push(QName(elem).localname, reader.reference(elem, cls_hint)) + try: + ref = reader.reference(elem, cls_hint) + except ValueError as e: + # Handle references to known non-standard classes; see + # https://github.com/khaeru/sdmx/issues/180 + info = e.args[1] + if info["package"] == "publicationtable": + log.warning( + "Cannot resolve reference to non-SDMX class " + f"'{info['package']}.{info['class']}'" + ) + # Push the dict of reference info, in case the user wants to make use of it + ref = info + else: # pragma: no cover + raise + + reader.push(QName(elem).localname, ref) @end("com:Annotation") From 8651ec2a4ca1b656eb3c1d78b6dae452e718e920 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 21:42:07 +0100 Subject: [PATCH 10/22] Move installed_schemas fixture to .testing --- sdmx/testing/__init__.py | 41 ++++++++++++++++++++++++ sdmx/tests/format/test_format_xml.py | 39 ---------------------- sdmx/tests/reader/test_reader_xml_v21.py | 8 ++--- 3 files changed, 45 insertions(+), 43 deletions(-) diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 231b84328..48842656e 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -1,5 +1,6 @@ import logging import os +import re from collections import ChainMap from pathlib import Path from typing import TYPE_CHECKING, Union @@ -7,9 +8,11 @@ import numpy as np import pandas as pd import pytest +import responses from xdist import is_xdist_worker from sdmx.exceptions import HTTPError +from sdmx.format import Version from sdmx.rest import Resource from sdmx.session import Session from sdmx.source import DataContentType, Source, get_source @@ -225,6 +228,44 @@ def msg(self, path): return sdmx.read_sdmx(path / self.filename) +@pytest.fixture(scope="session") +def installed_schemas(mock_gh_api, tmp_path_factory): + """Fixture that ensures schemas are installed locally in a temporary directory.""" + from sdmx.format.xml.common import install_schemas + + dir = tmp_path_factory.mktemp("schemas") + + with mock_gh_api: + install_schemas(dir.joinpath("2.1"), Version["2.1"]) + install_schemas(dir.joinpath("3.0"), Version["3.0.0"]) + + yield dir + + +@pytest.fixture(scope="session") +def mock_gh_api(): + """Mock GitHub API responses to avoid hitting rate limits. + + For each API endpoint URL queried by :func:.`_gh_zipball`, return a pared-down JSON + response that contains the required "zipball_url" key. + """ + base = "https://api.github.com/repos/sdmx-twg/sdmx-ml" + + # TODO Improve .util.requests to provide (roughly) the same functionality, then drop + # use of responses here + mock = responses.RequestsMock(assert_all_requests_are_fired=False) + mock.add_passthru(re.compile(rf"{base}/zipball/\w+")) + mock.add_passthru(re.compile(r"https://codeload.github.com/\w+")) + + for v in "2.1", "3.0", "3.0.0": + mock.get( + url=f"{base}/releases/tags/v{v}", + json=dict(zipball_url=f"{base}/zipball/v{v}"), + ) + + yield mock + + @pytest.fixture(scope="session") def session_with_pytest_cache(pytestconfig): """Fixture: A :class:`.Session` that caches within :file:`.pytest_cache`. diff --git a/sdmx/tests/format/test_format_xml.py b/sdmx/tests/format/test_format_xml.py index 8186eac4d..7af26447c 100644 --- a/sdmx/tests/format/test_format_xml.py +++ b/sdmx/tests/format/test_format_xml.py @@ -3,7 +3,6 @@ from pathlib import Path import pytest -import responses import sdmx from sdmx.format import Version, xml @@ -31,44 +30,6 @@ def test_class_for_tag(): assert xml.v30.class_for_tag("str:DataStructure") is not None -@pytest.fixture(scope="module") -def mock_gh_api(): - """Mock GitHub API responses to avoid hitting rate limits. - - For each API endpoint URL queried by :func:.`_gh_zipball`, return a pared-down JSON - response that contains the required "zipball_url" key. - """ - base = "https://api.github.com/repos/sdmx-twg/sdmx-ml" - - # TODO Improve .util.requests to provide (roughly) the same functionality, then drop - # use of responses here - mock = responses.RequestsMock(assert_all_requests_are_fired=False) - mock.add_passthru(re.compile(rf"{base}/zipball/\w+")) - mock.add_passthru(re.compile(r"https://codeload.github.com/\w+")) - - for v in "2.1", "3.0", "3.0.0": - mock.get( - url=f"{base}/releases/tags/v{v}", - json=dict(zipball_url=f"{base}/zipball/v{v}"), - ) - - mock.start() - - try: - yield - finally: - mock.stop() - - -@pytest.fixture(scope="module") -def installed_schemas(mock_gh_api, tmp_path_factory): - """Fixture that ensures schemas are installed locally in a temporary directory.""" - dir = tmp_path_factory.mktemp("schemas") - sdmx.install_schemas(dir.joinpath("2.1"), Version["2.1"]) - sdmx.install_schemas(dir.joinpath("3.0"), Version["3.0.0"]) - yield dir - - @pytest.mark.parametrize("version", ["1", 1, None]) def test_install_schemas_invalid_version(version): """Ensure invalid versions throw ``NotImplementedError``.""" diff --git a/sdmx/tests/reader/test_reader_xml_v21.py b/sdmx/tests/reader/test_reader_xml_v21.py index 4abdfa2ca..87a3e13a2 100644 --- a/sdmx/tests/reader/test_reader_xml_v21.py +++ b/sdmx/tests/reader/test_reader_xml_v21.py @@ -233,11 +233,11 @@ def test_gh_164(specimen): assert isinstance(da.related_to, v21.NoSpecifiedRelationship) -def test_gh_180(caplog, specimen) -> None: +def test_gh_180(caplog, installed_schemas, specimen) -> None: """Test of https://github.com/khaeru/sdmx/issues/190.""" with specimen("BIS/gh-180.xml") as f: # Message is not valid SDMX-ML - assert False is validate_xml(f) + assert False is validate_xml(f, installed_schemas) # Validation logs an error message regarding the non-standard class assert re.match( @@ -317,11 +317,11 @@ def test_gh_205(caplog, specimen) -> None: assert text == str(a.text) -def test_gh_218(caplog, specimen) -> None: +def test_gh_218(caplog, installed_schemas, specimen) -> None: """Test of https://github.com/khaeru/sdmx/pull/218.""" with specimen("constructed/gh-218.xml") as f: # Specimen is XSD-valid - validate_xml(f) + validate_xml(f, installed_schemas) f.seek(0) From a26f8f44157f7e2a8372cb8daea2679b24226c4a Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 10 Jan 2025 17:49:30 +0100 Subject: [PATCH 11/22] Improve validate_xml() - Locate schemas in any subdir of `schema_dir` arg. - Add max_errors parameter. - Only call XMLSchema.validate() once. - Don't hard-code valid root elements; interpret the error log. - Log all messages. - Adjust tests. --- sdmx/format/xml/common.py | 90 ++++++++++++++---------- sdmx/tests/format/test_format_xml.py | 87 +++++++++++------------ sdmx/tests/reader/test_reader_xml_v21.py | 2 +- 3 files changed, 95 insertions(+), 84 deletions(-) diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index 0a6a41155..9eafef4a3 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -6,7 +6,7 @@ from operator import itemgetter from pathlib import Path from shutil import copytree -from typing import IO, Iterable, Mapping, Optional, Union +from typing import IO, Iterable, Mapping, Optional, Union, cast from lxml import etree from lxml.etree import QName @@ -99,61 +99,75 @@ def validate_xml( msg: Union[Path, IO], schema_dir: Optional[Path] = None, version: Union[str, Version] = Version["2.1"], + max_errors: int = -1, ) -> bool: - """Validate and SDMX message against the XML Schema (XSD) documents. + """Validate SDMX-ML in `msg` against the XML Schema (XSD) documents. - The XML Schemas must first be installed or validation will fail. See - :func:`sdmx.install_schemas` to download the schema files. + A log message with level :data:`logging.ERROR` is emitted if validation fails. This + indicates the first (possibly not only) element in `msg` that is not valid per the + schemas. Parameters ---------- msg - A SDMX-ML Message formatted XML file. + Path or io-like containing an SDMX-ML message. schema_dir - The directory to XSD schemas used to validate the message. + Directory with SDMX-ML XSD schemas used to validate the message. version The SDMX-ML schema version to validate against. One of ``2.1`` or ``3.0``. + max_errors + Maximum number of messages to log on validation failure. Returns ------- bool - True if validation passed. False otherwise. + :any:`True` if validation passed, otherwise :any:`False`. + + Raises + ------ + FileNotFoundError + if `schema_dir` (or a subdirectory) does not contain :file:`SDMXMessage.xsd`. + Use :func:`sdmx.install_schemas` to download the schema files. + NotImplementedError + if `msg` contains valid XML, but with a root element that is not part of the + SDMX-ML standard. """ schema_dir, version = _handle_validate_args(schema_dir, version) - msg_doc = etree.parse(msg) + # Find SDMXMessage.xsd in `schema_dir` or a subdirectory + for candidate in schema_dir, schema_dir.joinpath(version.name): + try: + # Turn the XSD into a schema object + xml_schema = etree.XMLSchema(file=candidate.joinpath("SDMXMessage.xsd")) + break + except Exception: + xml_schema = None - # Make sure the message is a supported type - supported_elements = [ - "CodelistQuery", - "DataStructureQuery", - "GenericData", - "GenericMetadata", - "GenericTimeSeriesData", - "MetadataStructureQuery", - "Structure", - "StructureSpecificData", - "StructureSpecificMetadata", - "StructureSpecificTimeSeriesData", - ] - root_elem_name = msg_doc.docinfo.root_name - if root_elem_name not in supported_elements: - raise NotImplementedError - - message_xsd = schema_dir.joinpath("SDMXMessage.xsd") - if not message_xsd.exists(): - raise ValueError(f"Could not find XSD files in {schema_dir}") - - # Turn the XSD into a schema object - xml_schema_doc = etree.parse(message_xsd) - xml_schema = etree.XMLSchema(xml_schema_doc) + if xml_schema is None: + raise FileNotFoundError(f"Could not find XSD files in {schema_dir}") - try: - xml_schema.assertValid(msg_doc) - except etree.DocumentInvalid as err: - log.error(err) - finally: - return xml_schema.validate(msg_doc) + # Parse the given document + msg_doc = etree.parse(msg) + + if not xml_schema.validate(msg_doc): + for i, entry in enumerate( + cast(Iterable["etree._LogEntry"], xml_schema.error_log) + ): + if ( + i == 0 + and "No matching global declaration available for the validation root" + in entry.message + ): + raise NotImplementedError( + f"Validate non-SDMX root element <{msg_doc.getroot().tag}>" + ) from None + elif i == max_errors: + break + log.log(getattr(logging, entry.level_name), entry.message) + + return False + else: + return True def _extracted_zipball(version: Version) -> Path: diff --git a/sdmx/tests/format/test_format_xml.py b/sdmx/tests/format/test_format_xml.py index 7af26447c..38ff6a6ba 100644 --- a/sdmx/tests/format/test_format_xml.py +++ b/sdmx/tests/format/test_format_xml.py @@ -30,13 +30,6 @@ def test_class_for_tag(): assert xml.v30.class_for_tag("str:DataStructure") is not None -@pytest.mark.parametrize("version", ["1", 1, None]) -def test_install_schemas_invalid_version(version): - """Ensure invalid versions throw ``NotImplementedError``.""" - with pytest.raises(NotImplementedError): - sdmx.install_schemas(version=version) - - @pytest.mark.network @pytest.mark.parametrize("version", ["2.1", "3.0"]) def test_install_schemas(installed_schemas, version): @@ -62,28 +55,15 @@ def test_install_schemas_in_user_cache(): @pytest.mark.parametrize("version", ["1", 1, None]) -def test_validate_xml_invalid_version(version): - """Ensure validation of invalid versions throw ``NotImplementedError``.""" +def test_install_schemas_invalid_version(version): + """Ensure invalid versions throw ``NotImplementedError``.""" with pytest.raises(NotImplementedError): - # This message doesn't exist, but the version should throw before it is used. - sdmx.validate_xml("samples/common/common.xml", version=version) - - -def test_validate_xml_no_schemas(tmp_path, specimen, installed_schemas): - """Check that supplying an invalid schema path will raise ``ValueError``.""" - with specimen("IPI-2010-A21-structure.xml", opened=False) as msg_path: - with pytest.raises(ValueError): - sdmx.validate_xml(msg_path, schema_dir=tmp_path) + sdmx.install_schemas(version=version) @pytest.mark.network def test_validate_xml_from_v2_1_samples(tmp_path, specimen, installed_schemas): """Use official samples to ensure validation of v2.1 messages works correctly.""" - extracted_content = _extracted_zipball(Version["2.1"]) - - # Schemas as just in a flat directory - schema_dir = extracted_content.joinpath("schemas") - # Samples are somewhat spread out, and some are known broken so we pick a bunch for parts in [ ("v21", "xml", "common", "common.xml"), @@ -98,7 +78,31 @@ def test_validate_xml_from_v2_1_samples(tmp_path, specimen, installed_schemas): ("v21", "xml", "query", "response_esms_children.xml"), ]: with specimen(str(Path(*parts))) as sample: - assert sdmx.validate_xml(sample, schema_dir, version="2.1") + assert sdmx.validate_xml( + sample, installed_schemas.joinpath("2.1"), version="2.1" + ) + + +@pytest.mark.network +def test_validate_xml_from_v3_0_samples(tmp_path, installed_schemas): + """Use official samples to ensure validation of v3.0 messages works correctly.""" + extracted_content = _extracted_zipball(Version["3.0.0"]) + + # Schemas as just in a flat directory + schema_dir = extracted_content.joinpath("schemas") + + # Samples are somewhat spread out, and some are known broken so we pick a bunch + samples_dir = extracted_content.joinpath("samples") + samples = [ + samples_dir / "Codelist" / "codelist.xml", + samples_dir / "Codelist" / "codelist - extended.xml", + samples_dir / "Concept Scheme" / "conceptscheme.xml", + samples_dir / "Data Structure Definition" / "ECB_EXR.xml", + samples_dir / "Dataflow" / "dataflow.xml", + samples_dir / "Geospatial" / "geospatial_geographiccodelist.xml", + ] + for sample in samples: + assert sdmx.validate_xml(sample, schema_dir, version="3.0") @pytest.mark.network @@ -135,33 +139,26 @@ def test_validate_xml_invalid_doc(tmp_path, installed_schemas): assert not sdmx.validate_xml(msg_path, schema_dir=installed_schemas.joinpath("2.1")) -def test_validate_xml_invalid_message_type(): +def test_validate_xml_invalid_message_type(installed_schemas): """Ensure that an invalid document fails validation.""" # Create a mangled structure message with its outmost tag changed to be invalid msg = StructureMessage() invalid_msg = re.sub(b"mes:Structure([ >])", rb"mes:FooBar\1", sdmx.to_xml(msg)) - with pytest.raises(NotImplementedError): - sdmx.validate_xml(io.BytesIO(invalid_msg)) + with pytest.raises(NotImplementedError, match="Validate non-SDMX root.*FooBar>"): + sdmx.validate_xml(io.BytesIO(invalid_msg), installed_schemas) -@pytest.mark.network -def test_validate_xml_from_v3_0_samples(tmp_path, installed_schemas): - """Use official samples to ensure validation of v3.0 messages works correctly.""" - extracted_content = _extracted_zipball(Version["3.0.0"]) +@pytest.mark.parametrize("version", ["1", 1, None]) +def test_validate_xml_invalid_version(version): + """Ensure validation of invalid versions throw ``NotImplementedError``.""" + with pytest.raises(NotImplementedError): + # This message doesn't exist, but the version should throw before it is used. + sdmx.validate_xml("samples/common/common.xml", version=version) - # Schemas as just in a flat directory - schema_dir = extracted_content.joinpath("schemas") - # Samples are somewhat spread out, and some are known broken so we pick a bunch - samples_dir = extracted_content.joinpath("samples") - samples = [ - samples_dir / "Codelist" / "codelist.xml", - samples_dir / "Codelist" / "codelist - extended.xml", - samples_dir / "Concept Scheme" / "conceptscheme.xml", - samples_dir / "Data Structure Definition" / "ECB_EXR.xml", - samples_dir / "Dataflow" / "dataflow.xml", - samples_dir / "Geospatial" / "geospatial_geographiccodelist.xml", - ] - for sample in samples: - assert sdmx.validate_xml(sample, schema_dir, version="3.0") +def test_validate_xml_no_schemas(tmp_path, specimen): + """Check that supplying an invalid schema path will raise ``ValueError``.""" + with specimen("IPI-2010-A21-structure.xml", opened=False) as msg_path: + with pytest.raises(FileNotFoundError): + sdmx.validate_xml(msg_path, schema_dir=tmp_path) diff --git a/sdmx/tests/reader/test_reader_xml_v21.py b/sdmx/tests/reader/test_reader_xml_v21.py index 87a3e13a2..f09deb329 100644 --- a/sdmx/tests/reader/test_reader_xml_v21.py +++ b/sdmx/tests/reader/test_reader_xml_v21.py @@ -242,7 +242,7 @@ def test_gh_180(caplog, installed_schemas, specimen) -> None: # Validation logs an error message regarding the non-standard class assert re.match( ".*attribute 'package'.*'publicationtable' is not an element of the set", - caplog.messages[-1], + caplog.messages[-2], ) # Message can still be read From bcdcfe27155df34e677594feb55d066ca3e6c904 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 10 Jan 2025 22:21:36 +0100 Subject: [PATCH 12/22] Add IMF_beta{,3} data sources, tests, docs - Address multiple IMF service in docs (#38). --- doc/sources.rst | 84 ++++++++++++++++++++++++++++++++++++-- sdmx/sources.json | 13 ++++++ sdmx/tests/test_sources.py | 69 +++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 3 deletions(-) diff --git a/doc/sources.rst b/doc/sources.rst index 9e419f115..eaa1110ee 100644 --- a/doc/sources.rst +++ b/doc/sources.rst @@ -270,15 +270,93 @@ SDMX-ML — .. _IMF: -``IMF``: International Monetary Fund's “SDMX Central” source ------------------------------------------------------------- +International Monetary Fund +--------------------------- + +As of 2025-01-10, there appear to be at least *three* systems operated by the IMF from which SDMX responses are available. +Theses are listed here from oldest to newest, and identified by the domain used in the base URL for requests. + +(no ID): dataservices.smdx.org +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SDMX-ML and SDMX-JSON — +API documentation `1 `__, +`2 `__ + +- This appears to be an SDMX 2.0 REST web service, that can be induced to return SDMX-ML 2.1 or SDMX-JSON 1.0.0 messages through a ``?format=sdmx-2.1`` query parameter. +- :mod:`sdmx` does not provide a :file:`sources.json` entry/ID or tests for this service. +- However, the package code can still be used to access the responses. + For example: + +.. code-block:: python + + import sdmx + + client = sdmx.Client() + url = ( + # Base URL + "http://dataservices.imf.org/REST/SDMX_XML.svc/CompactData/" + # Data flow ID and key + "PCPS/M.W00.PZINC." + # Query parameters, including format + "?startPeriod=2021&endPeriod=2022&format=sdmx-2.1" + ) + + # Retrieve an SDMX-ML 2.1 data message + message = client.get(url=url) + + # Convert the single data set to pandas.Series with multi-index + df = sdmx.to_pandas(message.data[0]) + +``IMF``: sdmxcentral.imf.org +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SDMX-ML — `Website `__ -- Subset of the data available on http://data.imf.org. +- This appears to be an instance of the “Fusion Metadata Registry” software. + Such instances also expose SDMX 2.1 and 3.0 APIs. +- No API documentation appears to be available. +- The :mod:`sdmx` source with ID ``IMF`` corresponds to the SDMX 2.1 (SDMX-REST 1.x) API with base URL https://sdmxcentral.imf.org/ws/public/sdmxapi/rest. + The web interface suggests URLs for the SDMX 3.0.0 (SDMX-REST 2.x) API with base URL https://sdmxcentral.imf.org/sdmx/v2. + This API can be accessed by modifying the :attr:`.Source.url` and :attr:`~.Source.versions` attributes, or by constructing a new Source. + For example: + + .. code-block:: python + + import sdmx + from sdmx.format import Version + + client = sdmx.Client("IMF") + client.source.url = "https://sdmxcentral.imf.org/sdmx/v2" + client.source.versions = {Version["3.0.0"]} + + # Retrieve an SDMX-ML 3.0.0 structure message + message = client.dataflow("01R") + +- The source appears to provide a subset of the data available on https://data.imf.org. - Supports series-key-only and hence dataset-based key validation and construction. +``IMF_beta``, ``IMF_beta3``: api.imf.org +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SDMX-ML — +`Website `__ — +`API documentation `__ + +.. warning:: As of 2025-01-10, this source carries a banner: + + We're in Beta! + Help us improve by `testing `__ and sharing `feedback `__. + This is a beta version; the data is not final and should not be used for actual work. + + Users should heed this message. + The source IDs used in :mod:`sdmx` may change if and when this source exits beta and enters production, or is designated as the recommended, primary, or sole IMF source. + +- The API documentation indicates "Our data are available through SDMX 2.1 and SDMX 3.0 APIs," but the documentation pages mention only the SDMX 2.1 (SDMX-REST 1.x) base URL, https://api.imf.org/external/sdmx/2.1. + The base URL used by :mod:`sdmx` for the SDMX 3.0 (SDMX-REST 2.x) API is inferred. +- :mod:`sdmx` provides access to both versions of the API with IDs ``IMF_beta`` and ``IMF_beta3``. + As of 2025-01-10, both return HTTP **403 Forbidden** to every request except the SDMX 2.1 data query illustrated in the API documentation. .. _INEGI: diff --git a/sdmx/sources.json b/sdmx/sources.json index c72720d7a..f6d6e9397 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -228,6 +228,19 @@ "provisionagreement": false } }, + { + "id": "IMF_beta", + "url": "https://api.imf.org/external/sdmx/2.1", + "name": "International Monetary Fund" + }, + { + "id": "IMF_beta3", + "url": "https://api.imf.org/external/sdmx/3.0", + "name": "International Monetary Fund", + "versions": [ + "3.0.0" + ] + }, { "id": "INEGI", "url": "http://sdmx.snieg.mx/service/rest", diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 1e0702f9f..7a725e2cd 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -368,6 +368,75 @@ class TestIMF(DataSourceTest): source_id = "IMF" +# As of 2025-01-10, all endpoints aside from SDMX 2.1 /data/ return 403 +IMF_BETA_XFAIL: dict[str, Union[type[Exception], tuple[type[Exception], str]]] = { + k: HTTPError + for k in """ + actualconstraint + agencyscheme + allowedconstraint + categorisation + categoryscheme + codelist + conceptscheme + contentconstraint + dataconsumerscheme + dataflow + dataproviderscheme + datastructure + hierarchicalcodelist + metadataflow + metadatastructure + organisationscheme + provisionagreement + registration + structure + structureset + """.split() +} + + +class TestIMF_beta(DataSourceTest): + source_id = "IMF_beta" + + endpoint_args = dict( + # As indicated in the API documentation + data=dict( + resource_id="CPI", + key="111.CPI.CP01.IX.M", + params=dict(startPeriod=2018), + # Does not appear to affect 403 + # headers={"User-Agent": "idata-script-client"}, + ) + ) + + xfail = IMF_BETA_XFAIL | dict( + metadata=NotImplementedError, + registration=ValueError, + ) + + +class TestIMF_beta3(DataSourceTest): + source_id = "IMF_beta3" + + endpoint_args = dict( + data=dict( + context="dataflow", + agency_id="IMF", + resource_id="CPI", + key="111.CPI.CP01.IX.M", + # Not yet supported + # params={"c[TIME_PERIOD]": "ge:2018"}, + ), + metadata=dict(provider_id="IMF"), + ) + + xfail = IMF_BETA_XFAIL | dict( + data=HTTPError, # 403 + metadata=HTTPError, # 403 + ) + + class TestINEGI(DataSourceTest): source_id = "INEGI" From 3415a462289af44f3f9028ba046cb44ff16796ae Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 10 Jan 2025 22:36:26 +0100 Subject: [PATCH 13/22] Handle reading SDMX-ML 3.0 - Ensure URN.groupdict always exists. - Handle KeyError in .reader.xml.v30.Reference. - Test using IMF specimen. --- sdmx/reader/xml/v30.py | 6 +++--- sdmx/testing/data.py | 1 + sdmx/urn.py | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sdmx/reader/xml/v30.py b/sdmx/reader/xml/v30.py index b7fb69d09..636330733 100644 --- a/sdmx/reader/xml/v30.py +++ b/sdmx/reader/xml/v30.py @@ -8,7 +8,7 @@ from sdmx.model import v30 as model from . import v21 -from .common import BaseReference, XMLEventReader +from .common import BaseReference, NotReference, XMLEventReader class Reference(BaseReference): @@ -21,12 +21,12 @@ def info_from_element(cls, elem): # If the URN doesn't specify an item ID, it is probably a reference to a # MaintainableArtefact, so target_id and id are the same result.update(target_id=result["item_id"] or result["id"]) - except ValueError: + except (KeyError, ValueError): # Bare string that is the ID of e.g. a component (dimension) if id := (elem.text or "").strip(): result = {"id": id, "target_id": id, "class": None, "package": None} else: - raise v21.NotReference + raise NotReference() return result diff --git a/sdmx/testing/data.py b/sdmx/testing/data.py index bde173b35..e1311d4d6 100644 --- a/sdmx/testing/data.py +++ b/sdmx/testing/data.py @@ -321,6 +321,7 @@ def add_specimens(target: list[tuple[Path, str, Optional[str]]], base: Path) -> ("ESTAT", "HCL_WSTATUS_SCL_BNSPART.xml"), ("ESTAT", "HCL_WSTATUS_SCL_WSTATUSPR.xml"), ("IAEG-SDGs", "metadatastructure-0.xml"), + ("IMF", "01R.xml"), ("IMF", "1PI-structure.xml"), ("IMF", "CL_AREA-structure.xml"), # Manually reduced subset of the response for this DSD. Test for diff --git a/sdmx/urn.py b/sdmx/urn.py index b1395fa5a..a6f1e660d 100644 --- a/sdmx/urn.py +++ b/sdmx/urn.py @@ -48,6 +48,7 @@ def __init__(self, value: Optional[str], **kwargs) -> None: self.__dict__.update(kwargs) if value is None: + self.groupdict = {} # Needed by match() return try: From 6cc272b99af942e07d35f5af19ff2e055d2b5de9 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 10 Jan 2025 22:39:45 +0100 Subject: [PATCH 14/22] Handle "," as decimal separator in write_dataset() OBS_VALUE resembling "100,00" are returned by the AR1 source, at least. --- sdmx/writer/pandas.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sdmx/writer/pandas.py b/sdmx/writer/pandas.py index dc59b05d9..081ffeeb7 100644 --- a/sdmx/writer/pandas.py +++ b/sdmx/writer/pandas.py @@ -324,7 +324,12 @@ def write_dataset( # noqa: C901 TODO reduce complexity 12 → ≤10 if len(result): result.index.names = observation.key.order().values.keys() if dtype: - result["value"] = result["value"].astype(dtype) + try: + result["value"] = result["value"].astype(dtype) + except ValueError: + # Attempt to handle locales in which LC_NUMERIC.decimal_point is "," + # TODO Make this more robust by inferring and changing locale settings + result["value"] = result["value"].str.replace(",", ".").astype(dtype) if not attributes: result = result["value"] From 4125aaf2eac10269bc309cf3f21bd8e45802b490 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 13 Jan 2025 16:47:32 +0100 Subject: [PATCH 15/22] Ensure XSD-valid SDMX-ML from _header() --- sdmx/writer/xml.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sdmx/writer/xml.py b/sdmx/writer/xml.py index 36d4af211..d98f57660 100644 --- a/sdmx/writer/xml.py +++ b/sdmx/writer/xml.py @@ -1,11 +1,12 @@ """SDMX-ML v2.1 writer.""" + # Contents of this file are organized in the order: # # - Utility methods and global variables. # - writer functions for sdmx.message classes, in the same order as message.py # - writer functions for sdmx.model classes, in the same order as model.py - import logging +from datetime import datetime from typing import Iterable, Literal, MutableMapping, Optional from lxml import etree @@ -225,14 +226,15 @@ def _em(obj: message.ErrorMessage): @writer def _header(obj: message.Header): - elem = Element("mes:Header") - if obj.id: - elem.append(Element("mes:ID", obj.id)) - elem.append(Element("mes:Test", str(obj.test).lower())) - if obj.prepared: - elem.append(Element("mes:Prepared", obj.prepared.isoformat())) - if obj.sender: - elem.append(writer.recurse(obj.sender, _tag="mes:Sender")) + elem = Element( + "mes:Header", + # Mandatory child elements of mes:Header + Element("mes:ID", obj.id or "none"), + Element("mes:Test", str(obj.test).lower()), + Element("mes:Prepared", (obj.prepared or datetime.now()).isoformat()), + writer.recurse(obj.sender or common.Agency(id="none"), _tag="mes:Sender"), + ) + # Optional child elements if obj.receiver: elem.append(writer.recurse(obj.receiver, _tag="mes:Receiver")) if obj.source: From 0595cc322f7070483e2532a8a4ce52e6721dd040 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 13 Jan 2025 16:54:11 +0100 Subject: [PATCH 16/22] Tidy .writer.test_pandas - Sort alpha. - Shorten names. - Type hint all test functions. --- sdmx/tests/writer/test_pandas.py | 177 ++++++++++++++++--------------- 1 file changed, 93 insertions(+), 84 deletions(-) diff --git a/sdmx/tests/writer/test_pandas.py b/sdmx/tests/writer/test_pandas.py index 14193f728..7ca95b2a8 100644 --- a/sdmx/tests/writer/test_pandas.py +++ b/sdmx/tests/writer/test_pandas.py @@ -1,10 +1,13 @@ """Tests for pandasdmx/writer.py.""" +from typing import cast + import pandas as pd import pytest from pytest import raises import sdmx +from sdmx.message import DataMessage, StructureMessage from sdmx.model.v21 import TimeDimension from sdmx.testing import assert_pd_equal @@ -13,51 +16,7 @@ } -def test_write_data_arguments(specimen): - # The identity here is not important; any non-empty DataMessage will work - with specimen("INSEE/CNA-2010-CONSO-SI-A17.xml") as f: - msg = sdmx.read_sdmx(f) - - # Attributes must be a string - with raises(TypeError): - sdmx.to_pandas(msg, attributes=2) - - # Attributes must contain only 'dgso' - with raises(ValueError): - sdmx.to_pandas(msg, attributes="foobarbaz") - - -@pytest.mark.parametrize_specimens("path", kind="data", marks=MARKS) -def test_write_data(specimen, path): - if ("v3", "csv") == path.parts[-3:-1]: - pytest.skip("SDMX-CSV 3.0.0 examples cannot be read without DSD") - - msg = sdmx.read_sdmx(path) - - result = sdmx.to_pandas(msg) - - expected = specimen.expected_data(path) - if expected is not None: - print(expected, result, sep="\n") - assert_pd_equal(expected, result) - - # TODO incomplete - assert isinstance(result, (pd.Series, pd.DataFrame, list)), type(result) - - -@pytest.mark.parametrize_specimens("path", kind="data", marks=MARKS) -def test_write_data_attributes(path): - if ("v3", "csv") == path.parts[-3:-1]: - pytest.skip("SDMX-CSV 3.0.0 examples cannot be read without DSD") - - msg = sdmx.read_sdmx(path) - - result = sdmx.to_pandas(msg, attributes="osgd") - # TODO incomplete - assert isinstance(result, (pd.Series, pd.DataFrame, list)), type(result) - - -def test_write_agencyscheme(specimen): +def test_agencyscheme(specimen) -> None: # Convert an agency scheme with specimen("ECB/orgscheme.xml") as f: msg = sdmx.read_sdmx(f) @@ -80,7 +39,7 @@ def test_write_agencyscheme(specimen): data.structure -def test_write_categoryscheme(specimen): +def test_categoryscheme(specimen) -> None: with specimen("IPI-2010-A21-structure.xml") as f: msg = sdmx.read_sdmx(f) data = sdmx.to_pandas(msg) @@ -93,7 +52,7 @@ def test_write_categoryscheme(specimen): assert cs.loc["CNA-PIB-2005", "parent"] == "CNA-PIB" -def test_write_codelist(specimen): +def test_codelist(specimen) -> None: # Retrieve codelists from a test specimen and convert to pandas with specimen("common-structure.xml") as f: dsd_common = sdmx.read_sdmx(f) @@ -115,6 +74,7 @@ def test_write_codelist(specimen): # Hierarchical code list with specimen("codelist_partial.xml") as f: msg = sdmx.read_sdmx(f) + assert isinstance(msg, StructureMessage) # Convert single codelist CL_AREA = sdmx.to_pandas(msg.codelist["CL_AREA"]) @@ -134,7 +94,7 @@ def test_write_codelist(specimen): assert area_hierarchy.loc["002", "name_parent"] == "World" -def test_write_conceptscheme(specimen): +def test_conceptscheme(specimen) -> None: with specimen("common-structure.xml") as f: msg = sdmx.read_sdmx(f) data = sdmx.to_pandas(msg) @@ -143,7 +103,51 @@ def test_write_conceptscheme(specimen): assert cdc.loc["UNIT_MEASURE", "name"] == "Unit of Measure" -def test_write_dataflow(specimen): +@pytest.mark.parametrize_specimens("path", kind="data", marks=MARKS) +def test_data(specimen, path) -> None: + if ("v3", "csv") == path.parts[-3:-1]: + pytest.skip("SDMX-CSV 3.0.0 examples cannot be read without DSD") + + msg = sdmx.read_sdmx(path) + + result = sdmx.to_pandas(msg) + + expected = specimen.expected_data(path) + if expected is not None: + print(expected, result, sep="\n") + assert_pd_equal(expected, result) + + # TODO incomplete + assert isinstance(result, (pd.Series, pd.DataFrame, list)), type(result) + + +def test_data_arguments(specimen) -> None: + # The identity here is not important; any non-empty DataMessage will work + with specimen("INSEE/CNA-2010-CONSO-SI-A17.xml") as f: + msg = sdmx.read_sdmx(f) + + # Attributes must be a string + with raises(TypeError): + sdmx.to_pandas(msg, attributes=2) + + # Attributes must contain only 'dgso' + with raises(ValueError): + sdmx.to_pandas(msg, attributes="foobarbaz") + + +@pytest.mark.parametrize_specimens("path", kind="data", marks=MARKS) +def test_data_attributes(path) -> None: + if ("v3", "csv") == path.parts[-3:-1]: + pytest.skip("SDMX-CSV 3.0.0 examples cannot be read without DSD") + + msg = sdmx.read_sdmx(path) + + result = sdmx.to_pandas(msg, attributes="osgd") + # TODO incomplete + assert isinstance(result, (pd.Series, pd.DataFrame, list)), type(result) + + +def test_dataflow(specimen) -> None: # Read the INSEE dataflow definition with specimen("INSEE/dataflow") as f: msg = sdmx.read_sdmx(f) @@ -168,11 +172,44 @@ def test_write_dataflow(specimen): assert_pd_equal(result["dataflow"].head(), expected) -def test_write_dataset_datetime(specimen): +@pytest.mark.network +def test_dataset_constraint(specimen) -> None: + """'constraint' argument to writer.write_dataset.""" + with specimen("ng-ts.xml") as f: + msg = sdmx.read_sdmx(f) + assert isinstance(msg, DataMessage) + + # Fetch the message's DSD + assert msg.structure.is_external_reference + # NB the specimen included in tests/data has 'ECB_EXR_NG' as the data structure ID; + # but a query against the web service gives 'ECB_EXR1' for the same data + # structure. + id = "ECB_EXR1" + dsd = cast( + StructureMessage, + sdmx.Client(msg.structure.maintainer.id).get("datastructure", id), + ).structure[id] + + # Create a ContentConstraint + cc = dsd.make_constraint({"CURRENCY": "JPY+USD"}) + + # Write the message without constraint + s1 = sdmx.to_pandas(msg) + assert len(s1) == 12 + assert set(s1.index.to_frame()["CURRENCY"]) == {"CHF", "GBP", "JPY", "USD"} + + # Writing using constraint produces a fewer items; only those matching the + # constraint + s2 = sdmx.to_pandas(msg, constraint=cc) + assert len(s2) == 6 + assert set(s2.index.to_frame()["CURRENCY"]) == {"JPY", "USD"} + + +def test_dataset_datetime(specimen) -> None: """Test datetime arguments to write_dataset().""" # Load structure with specimen("IPI-2010-A21-structure.xml") as f: - dsd = sdmx.read_sdmx(f).structure["IPI-2010-A21"] + dsd = cast(StructureMessage, sdmx.read_sdmx(f)).structure["IPI-2010-A21"] TIME_PERIOD = dsd.dimensions.get("TIME_PERIOD") FREQ = dsd.dimensions.get("FREQ") @@ -181,9 +218,11 @@ def test_write_dataset_datetime(specimen): # Load data, two ways with specimen("IPI-2010-A21.xml") as f: msg = sdmx.read_sdmx(f, structure=dsd) + assert isinstance(msg, DataMessage) ds = msg.data[0] with specimen("IPI-2010-A21.xml") as f: msg_no_structure = sdmx.read_sdmx(f) + assert isinstance(msg_no_structure, DataMessage) other_dims = list( filter(lambda n: n != "TIME_PERIOD", [d.id for d in dsd.dimensions.components]) @@ -273,49 +312,19 @@ def expected(df, axis=0, cls=pd.DatetimeIndex): sdmx.to_pandas(ds, datetime=43) -def test_write_list_of_obs(specimen): +def test_list_of_obs(specimen) -> None: """Bare list of observations can be written.""" with specimen("ng-ts.xml") as f: msg = sdmx.read_sdmx(f) + assert isinstance(msg, DataMessage) sdmx.to_pandas(msg.data[0].obs) @pytest.mark.parametrize_specimens("path", kind="structure") -def test_writer_structure(path): +def test_structure(path) -> None: msg = sdmx.read_sdmx(path) sdmx.to_pandas(msg) # TODO test contents - - -@pytest.mark.network -def test_write_constraint(specimen): - """'constraint' argument to writer.write_dataset.""" - with specimen("ng-ts.xml") as f: - msg = sdmx.read_sdmx(f) - - # Fetch the message's DSD - assert msg.structure.is_external_reference - # NB the speciment included in tests/data has 'ECB_EXR_NG' as the - # data structure ID; but a query against the web service gives - # 'ECB_EXR1' for the same data structure. - id = "ECB_EXR1" - dsd = ( - sdmx.Client(msg.structure.maintainer.id).get("datastructure", id).structure[id] - ) - - # Create a ContentConstraint - cc = dsd.make_constraint({"CURRENCY": "JPY+USD"}) - - # Write the message without constraint - s1 = sdmx.to_pandas(msg) - assert len(s1) == 12 - assert set(s1.index.to_frame()["CURRENCY"]) == {"CHF", "GBP", "JPY", "USD"} - - # Writing using constraint produces a fewer items; only those matching the - # constraint - s2 = sdmx.to_pandas(msg, constraint=cc) - assert len(s2) == 6 - assert set(s2.index.to_frame()["CURRENCY"]) == {"JPY", "USD"} From 7bcbc7a87102704684d2e16c0d5952b822757822 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 13 Jan 2025 17:13:19 +0100 Subject: [PATCH 17/22] Test conversion of e.g. "100,1" to pandas --- sdmx/tests/writer/test_pandas.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sdmx/tests/writer/test_pandas.py b/sdmx/tests/writer/test_pandas.py index 7ca95b2a8..23351d57c 100644 --- a/sdmx/tests/writer/test_pandas.py +++ b/sdmx/tests/writer/test_pandas.py @@ -3,11 +3,13 @@ from typing import cast import pandas as pd +import pandas.testing as pdt import pytest from pytest import raises import sdmx from sdmx.message import DataMessage, StructureMessage +from sdmx.model import common, v21 from sdmx.model.v21 import TimeDimension from sdmx.testing import assert_pd_equal @@ -121,6 +123,30 @@ def test_data(specimen, path) -> None: assert isinstance(result, (pd.Series, pd.DataFrame, list)), type(result) +def test_data_decimal() -> None: + """Test handling of "," as a decimal separator.""" + # Data set with string values containing "," as a decimal separator + ds = v21.DataSet( + obs=[ + v21.Observation(dimension=common.Key(FOO="A"), value="1"), + v21.Observation(dimension=common.Key(FOO="B"), value="1,0"), + v21.Observation(dimension=common.Key(FOO="C"), value="100,1"), + ] + ) + + # Expected result + exp = pd.Series( + [1.0, 1.0, 100.1], + index=pd.MultiIndex.from_product([list("ABC")], names=["FOO"]), + name="value", + ) + + # Conversion occurs without error + result = sdmx.to_pandas(ds) + # Result is as expected + pdt.assert_series_equal(exp, result) + + def test_data_arguments(specimen) -> None: # The identity here is not important; any non-empty DataMessage will work with specimen("INSEE/CNA-2010-CONSO-SI-A17.xml") as f: From 655f457b32d7cb570994605f9131d00ff44541d6 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 13 Jan 2025 17:20:28 +0100 Subject: [PATCH 18/22] Run mypy via pre-commit on all files --- .pre-commit-config.yaml | 2 +- pyproject.toml | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4a6335937..362d06364 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,6 +3,7 @@ repos: rev: v1.13.0 hooks: - id: mypy + pass_filenames: false additional_dependencies: - GitPython - lxml-stubs @@ -14,7 +15,6 @@ repos: - types-python-dateutil - types-PyYAML - types-requests - args: [] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.8.3 hooks: diff --git a/pyproject.toml b/pyproject.toml index 3a64a9e97..c5d3afcf6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,11 @@ exclude_also = [ ] [tool.mypy] -exclude = ["^build/"] +files = [ + "conftest.py", + "doc", + "sdmx", +] [[tool.mypy.overrides]] # Packages/modules for which no type hints are available. From 51912fd57ac44c52a0972c837c4db093d6622af4 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 13 Jan 2025 17:49:49 +0100 Subject: [PATCH 19/22] Add required RTD config settings --- .readthedocs.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.readthedocs.yml b/.readthedocs.yml index b64469589..a882cf8a4 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -10,3 +10,6 @@ python: - method: pip path: . extra_requirements: [cache,docs,tests] + +sphinx: + configuration: doc/conf.py From abf83cfb5aa8e26a40fb6d60feb6d7a4ef5207d9 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 13 Jan 2025 18:07:16 +0100 Subject: [PATCH 20/22] Update docstring of session_with_stored_responses() --- sdmx/testing/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 48842656e..f456ac4e2 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -281,8 +281,12 @@ def session_with_pytest_cache(pytestconfig): def session_with_stored_responses(pytestconfig): """Fixture: A :class:`.Session` returns only stored responses from sdmx-test-data. - This session (a) uses the 'filesystem' :mod:`requests_cache` backend and (b) is - treated with :func:`.offline`, so that *only* stored responses can be returned. + This session… + + 1. uses the 'memory' :mod:`requests_cache` backend; + 2. contains the responses from :func:`.testing.data.add_responses`; and + 3. is treated with :func:`.offline`, so that *only* stored responses can be + returned. """ session = Session(backend="memory") From eb5449ccaeb5df1a706ed8617c4cc42995c349e9 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 18:33:00 +0100 Subject: [PATCH 21/22] Update tests for #218 --- sdmx/tests/format/test_format_xml.py | 26 ++++++++++++++++++++++---- sdmx/tests/test_source.py | 2 +- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/sdmx/tests/format/test_format_xml.py b/sdmx/tests/format/test_format_xml.py index 38ff6a6ba..9ec8d77af 100644 --- a/sdmx/tests/format/test_format_xml.py +++ b/sdmx/tests/format/test_format_xml.py @@ -135,18 +135,19 @@ def test_validate_xml_invalid_doc(tmp_path, installed_schemas): msg_path.write_bytes(sdmx.to_xml(msg)) - # Expect validation to fail - assert not sdmx.validate_xml(msg_path, schema_dir=installed_schemas.joinpath("2.1")) + assert sdmx.validate_xml(msg_path, schema_dir=installed_schemas.joinpath("2.1")) def test_validate_xml_invalid_message_type(installed_schemas): """Ensure that an invalid document fails validation.""" # Create a mangled structure message with its outmost tag changed to be invalid msg = StructureMessage() - invalid_msg = re.sub(b"mes:Structure([ >])", rb"mes:FooBar\1", sdmx.to_xml(msg)) + invalid_msg = io.BytesIO( + re.sub(b"mes:Structure([ >])", rb"mes:FooBar\1", sdmx.to_xml(msg)) + ) with pytest.raises(NotImplementedError, match="Validate non-SDMX root.*FooBar>"): - sdmx.validate_xml(io.BytesIO(invalid_msg), installed_schemas) + sdmx.validate_xml(invalid_msg, installed_schemas) @pytest.mark.parametrize("version", ["1", 1, None]) @@ -157,6 +158,23 @@ def test_validate_xml_invalid_version(version): sdmx.validate_xml("samples/common/common.xml", version=version) +def test_validate_xml_max_errors(caplog, installed_schemas): + """Test :py:`validate_xml(..., max_errors=...)`.""" + msg = StructureMessage() + invalid_msg = io.BytesIO( + re.sub(b"<(mes:Structures)/>", rb"<\1>", sdmx.to_xml(msg)) + ) + + # Without max_errors, 2 messages are logged + sdmx.validate_xml(invalid_msg, installed_schemas) + assert 2 == len(caplog.messages) + caplog.clear() + + # With the argument, only 1 message is logged + sdmx.validate_xml(invalid_msg, installed_schemas, max_errors=1) + assert 1 == len(caplog.messages) + + def test_validate_xml_no_schemas(tmp_path, specimen): """Check that supplying an invalid schema path will raise ``ValueError``.""" with specimen("IPI-2010-A21-structure.xml", opened=False) as msg_path: diff --git a/sdmx/tests/test_source.py b/sdmx/tests/test_source.py index db393c99f..4a724ecf4 100644 --- a/sdmx/tests/test_source.py +++ b/sdmx/tests/test_source.py @@ -17,7 +17,7 @@ def test_get_source(caplog): def test_list_sources(): source_ids = list_sources() # Correct number of sources, excluding those created for testing - assert 29 == len(set(source_ids) - {"MOCK", "TEST"}) + assert 34 == len(set(source_ids) - {"MOCK", "TEST"}) # Listed alphabetically assert "ABS" == source_ids[0] From 2fadd96ce22404f67ae7fece04aa82f0f1f6bf42 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 9 Jan 2025 18:38:40 +0100 Subject: [PATCH 22/22] Add #218 to doc/whatsnew --- doc/api.rst | 4 ++++ doc/whatsnew.rst | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index d7c9699bc..0de7c8301 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -36,6 +36,8 @@ Top-level methods and classes Client Resource add_source + get_source + install_schemas list_sources log read_sdmx @@ -43,6 +45,8 @@ Top-level methods and classes to_csv to_pandas to_xml + to_sdmx + validate_xml ``format``: SDMX file formats ============================= diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst index 2b2d49a55..253c6c87a 100644 --- a/doc/whatsnew.rst +++ b/doc/whatsnew.rst @@ -3,16 +3,24 @@ What's new? *********** -.. _2.20.1: +.. _2.21.0: Next release ============ +- Add :ref:`AR1 `, :ref:`StatCan `, and :ref:`UY110 ` data sources (:pull:`218`, :issue:`186`, :issue:`187`, :issue:`188`). +- Add :ref:`IMF_beta, IMF_beta3 ` data sources and expand documentation on 3 distinct IMF-run web services (:pull:`218`, :issue:`38`). +- New function :func:`.get_source` for case-insensitive lookup of sources (:pull:`218`). + :class:`.Client` will handle, for instance, :py:`Client("wb")` the same as :py:`Client("WB")` and log a message about the difference. - Simplify :class:`.Session` via direct inheritance from :class:`.requests_cache.session.CacheMixin`, where installed (:pull:`217`). - Add an optional :py:`session=...` keyword argument to :class:`.Client` (:pull:`217`). +- Add an optional :py:`max_errors=...` keyword argument to :func:`.validate_xml` (:pull:`218`). - Improve :ref:`network and offline tests ` via new and improved test utilities (:pull:`217`). New test fixtures :func:`.session_with_pytest_cache` and :func:`.session_with_stored_responses`. +- Tolerate invalid SDMX returned by :ref:`BIS ` (and possibly other sources) that contains references to the non-existent :py:`PublicationTable` class (:pull:`218`, :issue:`38`). - Bug fix for reading :xml:`` from SDMX-ML 2.1: the :attr:`.Categorisation.category` attribute was read as an instance of Categorisation, rather than Category (:pull:`215`). +- Bug fix for reading :xml:`` and :xml:`` from SDMX-ML 2.1 :xml:`` (:pull:`218`). + Up to v2.20.0, these caused :class:`NotImplementedError`. .. _2.20.0: