From edc4c900a24960a2f859e52945b8ab171716f53d Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sat, 25 Jan 2025 11:36:59 +0100
Subject: [PATCH] Declare the environment file in config.py
---
openml/config.py | 1 +
openml/datasets/dataset.py | 3 ++-
openml/datasets/functions.py | 3 ++-
3 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/openml/config.py b/openml/config.py
index a244a317e..d838b070a 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -23,6 +23,7 @@
file_handler: logging.handlers.RotatingFileHandler | None = None
OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR"
+OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
class _Config(TypedDict):
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 2b021c8ab..5190ac522 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -18,6 +18,7 @@
import xmltodict
from openml.base import OpenMLBase
+from openml.config import OPENML_SKIP_PARQUET_ENV_VAR
from openml.exceptions import PyOpenMLError
from .data_feature import OpenMLDataFeature
@@ -359,7 +360,7 @@ def _download_data(self) -> None:
# import required here to avoid circular import.
from .functions import _get_dataset_arff, _get_dataset_parquet
- skip_parquet = os.environ.get("OPENML_SKIP_PQ", "false").casefold() == "true"
+ skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true"
if self._parquet_url is not None and not skip_parquet:
parquet_file = _get_dataset_parquet(self)
self.parquet_file = None if parquet_file is None else str(parquet_file)
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 8e67cd55f..3f3c709f9 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -21,6 +21,7 @@
import openml._api_calls
import openml.utils
+from openml.config import OPENML_SKIP_PARQUET_ENV_VAR
from openml.exceptions import (
OpenMLHashException,
OpenMLPrivateDatasetError,
@@ -562,7 +563,7 @@ def get_dataset( # noqa: C901, PLR0912
qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id)
parquet_file = None
- skip_parquet = os.environ.get("OPENML_SKIP_PQ", "false").casefold() == "true"
+ skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true"
download_parquet = "oml:parquet_url" in description and not skip_parquet
if download_parquet and (download_data or download_all_files):
try: