diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 4d1d17674..3509f18e7 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -24,6 +24,7 @@ from .__version__ import __version__ from .exceptions import ( OpenMLHashException, + OpenMLNotAuthorizedError, OpenMLServerError, OpenMLServerException, OpenMLServerNoResult, @@ -36,6 +37,8 @@ FILE_ELEMENTS_TYPE = Dict[str, Union[str, Tuple[str, str]]] DATABASE_CONNECTION_ERRCODE = 107 +API_TOKEN_HELP_LINK = "https://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial.html#authentication" # noqa: S105 + def _robot_delay(n: int) -> float: wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 @@ -456,21 +459,28 @@ def __parse_server_exception( url: str, file_elements: FILE_ELEMENTS_TYPE | None, ) -> OpenMLServerError: - if response.status_code == 414: + if response.status_code == requests.codes.URI_TOO_LONG: raise OpenMLServerError(f"URI too long! ({url})") + # OpenML has a sophisticated error system where information about failures is provided, + # in the response body itself. + # First, we need to parse it out. try: server_exception = xmltodict.parse(response.text) except xml.parsers.expat.ExpatError as e: raise e except Exception as e: - # OpenML has a sophisticated error system - # where information about failures is provided. try to parse this + # If we failed to parse it out, then something has gone wrong in the body we have sent back + # from the server and there is little extra information we can capture. raise OpenMLServerError( f"Unexpected server error when calling {url}. Please contact the developers!\n" f"Status code: {response.status_code}\n{response.text}", ) from e + # Now we can parse out the specific error codes that we return. These + # are in addition to the typical HTTP error codes, but encode more + # specific informtion. You can find these codes here: + # https://github.com/openml/OpenML/blob/develop/openml_OS/views/pages/api_new/v1/xml/pre.php server_error = server_exception["oml:error"] code = int(server_error["oml:code"]) message = server_error["oml:message"] @@ -496,4 +506,21 @@ def __parse_server_exception( ) else: full_message = f"{message} - {additional_information}" + + if code in [ + 102, # flow/exists post + 137, # dataset post + 350, # dataset/42 delete + 310, # flow/ post + 320, # flow/42 delete + 400, # run/42 delete + 460, # task/42 delete + ]: + msg = ( + f"The API call {url} requires authentication via an API key.\nPlease configure " + "OpenML-Python to use your API as described in this example:" + "\nhttps://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial.html#authentication" + ) + return OpenMLNotAuthorizedError(message=msg) + return OpenMLServerException(code=code, message=full_message, url=url) diff --git a/openml/config.py b/openml/config.py index a412c0cca..a244a317e 100644 --- a/openml/config.py +++ b/openml/config.py @@ -10,9 +10,10 @@ import platform import shutil import warnings +from contextlib import contextmanager from io import StringIO from pathlib import Path -from typing import Any, cast +from typing import Any, Iterator, cast from typing_extensions import Literal, TypedDict from urllib.parse import urlparse @@ -174,11 +175,11 @@ def get_server_base_url() -> str: apikey: str = _defaults["apikey"] show_progress: bool = _defaults["show_progress"] # The current cache directory (without the server name) -_root_cache_directory = Path(_defaults["cachedir"]) +_root_cache_directory: Path = Path(_defaults["cachedir"]) avoid_duplicate_runs = _defaults["avoid_duplicate_runs"] -retry_policy = _defaults["retry_policy"] -connection_n_retries = _defaults["connection_n_retries"] +retry_policy: Literal["human", "robot"] = _defaults["retry_policy"] +connection_n_retries: int = _defaults["connection_n_retries"] def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None: @@ -497,6 +498,18 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example +@contextmanager +def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: + """A context manager to temporarily override variables in the configuration.""" + existing_config = get_config_as_dict() + merged_config = {**existing_config, **config} + + _setup(merged_config) # type: ignore + yield merged_config # type: ignore + + _setup(existing_config) + + __all__ = [ "get_cache_directory", "set_root_cache_directory", diff --git a/openml/utils.py b/openml/utils.py index 66c4df800..82859fd40 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -234,7 +234,7 @@ def _delete_entity(entity_type: str, entity_id: int) -> bool: " please open an issue at: https://github.com/openml/openml/issues/new" ), ) from e - raise + raise e @overload diff --git a/tests/conftest.py b/tests/conftest.py index 81c7c0d5a..79ee2bbd3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,7 @@ # License: BSD 3-Clause from __future__ import annotations +from collections.abc import Iterator import logging import os import shutil @@ -195,55 +196,90 @@ def pytest_addoption(parser): def _expected_static_cache_state(root_dir: Path) -> list[Path]: _c_root_dir = root_dir / "org" / "openml" / "test" res_paths = [root_dir, _c_root_dir] - + for _d in ["datasets", "tasks", "runs", "setups"]: res_paths.append(_c_root_dir / _d) - for _id in ["-1","2"]: + for _id in ["-1", "2"]: tmp_p = _c_root_dir / "datasets" / _id - res_paths.extend([ - tmp_p / "dataset.arff", - tmp_p / "features.xml", - tmp_p / "qualities.xml", - tmp_p / "description.xml", - ]) + res_paths.extend( + [ + tmp_p / "dataset.arff", + tmp_p / "features.xml", + tmp_p / "qualities.xml", + tmp_p / "description.xml", + ] + ) res_paths.append(_c_root_dir / "datasets" / "30" / "dataset_30.pq") res_paths.append(_c_root_dir / "runs" / "1" / "description.xml") res_paths.append(_c_root_dir / "setups" / "1" / "description.xml") - + for _id in ["1", "3", "1882"]: tmp_p = _c_root_dir / "tasks" / _id - res_paths.extend([ - tmp_p / "datasplits.arff", - tmp_p / "task.xml", - ]) - + res_paths.extend( + [ + tmp_p / "datasplits.arff", + tmp_p / "task.xml", + ] + ) + return res_paths def assert_static_test_cache_correct(root_dir: Path) -> None: for p in _expected_static_cache_state(root_dir): - assert p.exists(), f"Expected path {p} does not exist" - + assert p.exists(), f"Expected path {p} exists" + @pytest.fixture(scope="class") def long_version(request): request.cls.long_version = request.config.getoption("--long") -@pytest.fixture() +@pytest.fixture(scope="session") def test_files_directory() -> Path: return Path(__file__).parent / "files" -@pytest.fixture() +@pytest.fixture(scope="session") def test_api_key() -> str: return "c0c42819af31e706efe1f4b88c23c6c1" -@pytest.fixture(autouse=True) -def verify_cache_state(test_files_directory) -> None: +@pytest.fixture(autouse=True, scope="function") +def verify_cache_state(test_files_directory) -> Iterator[None]: assert_static_test_cache_correct(test_files_directory) yield assert_static_test_cache_correct(test_files_directory) + + +@pytest.fixture(autouse=True, scope="session") +def as_robot() -> Iterator[None]: + policy = openml.config.retry_policy + n_retries = openml.config.connection_n_retries + openml.config.set_retry_policy("robot", n_retries=20) + yield + openml.config.set_retry_policy(policy, n_retries) + + +@pytest.fixture(autouse=True, scope="session") +def with_test_server(): + openml.config.start_using_configuration_for_example() + yield + openml.config.stop_using_configuration_for_example() + + +@pytest.fixture(autouse=True) +def with_test_cache(test_files_directory, request): + if not test_files_directory.exists(): + raise ValueError( + f"Cannot find test cache dir, expected it to be {test_files_directory!s}!", + ) + _root_cache_directory = openml.config._root_cache_directory + tmp_cache = test_files_directory / request.node.name + openml.config.set_root_cache_directory(tmp_cache) + yield + openml.config.set_root_cache_directory(_root_cache_directory) + if tmp_cache.exists(): + shutil.rmtree(tmp_cache) diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py index bf5b03f3f..a0980f5f9 100644 --- a/tests/test_evaluations/test_evaluations_example.py +++ b/tests/test_evaluations/test_evaluations_example.py @@ -3,35 +3,47 @@ import unittest +from openml.config import overwrite_config_context + class TestEvaluationsExample(unittest.TestCase): def test_example_python_paper(self): # Example script which will appear in the upcoming OpenML-Python paper # This test ensures that the example will keep running! - - import matplotlib.pyplot as plt - import numpy as np - - import openml - - df = openml.evaluations.list_evaluations_setups( - "predictive_accuracy", - flows=[8353], - tasks=[6], - output_format="dataframe", - parameters_in_separate_columns=True, - ) # Choose an SVM flow, for example 8353, and a task. - - hp_names = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"] - df[hp_names] = df[hp_names].astype(float).apply(np.log) - C, gamma, score = df[hp_names[0]], df[hp_names[1]], df["value"] - - cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r") - plt.colorbar(cntr, label="accuracy") - plt.xlim((min(C), max(C))) - plt.ylim((min(gamma), max(gamma))) - plt.xlabel("C (log10)", size=16) - plt.ylabel("gamma (log10)", size=16) - plt.title("SVM performance landscape", size=20) - - plt.tight_layout() + with overwrite_config_context( + { + "server": "https://www.openml.org/api/v1/xml", + "apikey": None, + } + ): + import matplotlib.pyplot as plt + import numpy as np + + import openml + + df = openml.evaluations.list_evaluations_setups( + "predictive_accuracy", + flows=[8353], + tasks=[6], + output_format="dataframe", + parameters_in_separate_columns=True, + ) # Choose an SVM flow, for example 8353, and a task. + + assert len(df) > 0, ( + "No evaluation found for flow 8353 on task 6, could " + "be that this task is not available on the test server." + ) + + hp_names = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"] + df[hp_names] = df[hp_names].astype(float).apply(np.log) + C, gamma, score = df[hp_names[0]], df[hp_names[1]], df["value"] + + cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r") + plt.colorbar(cntr, label="accuracy") + plt.xlim((min(C), max(C))) + plt.ylim((min(gamma), max(gamma))) + plt.xlabel("C (log10)", size=16) + plt.ylabel("gamma (log10)", size=16) + plt.title("SVM performance landscape", size=20) + + plt.tight_layout() diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py index 37cf6591d..51123b0d8 100644 --- a/tests/test_openml/test_api_calls.py +++ b/tests/test_openml/test_api_calls.py @@ -9,8 +9,9 @@ import pytest import openml +from openml.config import ConfigurationForExamples import openml.testing -from openml._api_calls import _download_minio_bucket +from openml._api_calls import _download_minio_bucket, API_TOKEN_HELP_LINK class TestConfig(openml.testing.TestBase): @@ -99,3 +100,26 @@ def test_download_minio_failure(mock_minio, tmp_path: Path) -> None: with pytest.raises(ValueError): _download_minio_bucket(source=some_url, destination=tmp_path) + + +@pytest.mark.parametrize( + "endpoint, method", + [ + # https://github.com/openml/OpenML/blob/develop/openml_OS/views/pages/api_new/v1/xml/pre.php + ("flow/exists", "post"), # 102 + ("dataset", "post"), # 137 + ("dataset/42", "delete"), # 350 + # ("flow/owned", "post"), # 310 - Couldn't find what would trigger this + ("flow/42", "delete"), # 320 + ("run/42", "delete"), # 400 + ("task/42", "delete"), # 460 + ], +) +def test_authentication_endpoints_requiring_api_key_show_relevant_help_link( + endpoint: str, + method: str, +) -> None: + # We need to temporarily disable the API key to test the error message + with openml.config.overwrite_config_context({"apikey": None}): + with pytest.raises(openml.exceptions.OpenMLNotAuthorizedError, match=API_TOKEN_HELP_LINK): + openml._api_calls._perform_api_call(call=endpoint, request_method=method, data=None) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index cae947917..d900671b7 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -8,37 +8,6 @@ from openml.testing import _check_dataset -@pytest.fixture(autouse=True) -def as_robot(): - policy = openml.config.retry_policy - n_retries = openml.config.connection_n_retries - openml.config.set_retry_policy("robot", n_retries=20) - yield - openml.config.set_retry_policy(policy, n_retries) - - -@pytest.fixture(autouse=True) -def with_test_server(): - openml.config.start_using_configuration_for_example() - yield - openml.config.stop_using_configuration_for_example() - - -@pytest.fixture(autouse=True) -def with_test_cache(test_files_directory, request): - if not test_files_directory.exists(): - raise ValueError( - f"Cannot find test cache dir, expected it to be {test_files_directory!s}!", - ) - _root_cache_directory = openml.config._root_cache_directory - tmp_cache = test_files_directory / request.node.name - openml.config.set_root_cache_directory(tmp_cache) - yield - openml.config.set_root_cache_directory(_root_cache_directory) - if tmp_cache.exists(): - shutil.rmtree(tmp_cache) - - @pytest.fixture() def min_number_tasks_on_test_server() -> int: """After a reset at least 1068 tasks are on the test server"""