From bf6e0001e00bff1a0933dd53588bbc45b312cc76 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Tue, 30 Dec 2025 02:07:17 +0530 Subject: [PATCH 001/156] changes made --- openml/_api_calls.py | 4 +- openml/config.py | 252 +++++++++++++++++++++---------------------- 2 files changed, 127 insertions(+), 129 deletions(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 81296b3da..12567ac7a 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -71,7 +71,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url = config.server + url = config._config.server if not url.endswith("/"): url += "/" url += endpoint @@ -301,7 +301,7 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_url = config.server.split("/api/") + openml_url = config._config.server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: url += "/" + filename diff --git a/openml/config.py b/openml/config.py index cf66a6346..98a48a1c6 100644 --- a/openml/config.py +++ b/openml/config.py @@ -1,6 +1,7 @@ """Store module level information like the API key, cache directory and the server""" # License: BSD 3-Clause +# ruff: noqa: PLW0603 from __future__ import annotations import configparser @@ -11,10 +12,11 @@ import shutil import warnings from contextlib import contextmanager +from dataclasses import dataclass, replace from io import StringIO from pathlib import Path -from typing import Any, Iterator, cast -from typing_extensions import Literal, TypedDict +from typing import Any, Iterator +from typing_extensions import Literal from urllib.parse import urlparse logger = logging.getLogger(__name__) @@ -27,19 +29,62 @@ _TEST_SERVER_NORMAL_USER_KEY = "normaluser" -class _Config(TypedDict): - apikey: str - server: str - cachedir: Path - avoid_duplicate_runs: bool - retry_policy: Literal["human", "robot"] - connection_n_retries: int - show_progress: bool +# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) +_user_path = Path("~").expanduser().absolute() + + +def _resolve_default_cache_dir() -> Path: + user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) + if user_defined_cache_dir is not None: + return Path(user_defined_cache_dir) + + if platform.system().lower() != "linux": + return _user_path / ".openml" + + xdg_cache_home = os.environ.get("XDG_CACHE_HOME") + if xdg_cache_home is None: + return Path("~", ".cache", "openml") + + # This is the proper XDG_CACHE_HOME directory, but + # we unfortunately had a problem where we used XDG_CACHE_HOME/org, + # we check heuristically if this old directory still exists and issue + # a warning if it does. There's too much data to move to do this for the user. + + # The new cache directory exists + cache_dir = Path(xdg_cache_home) / "openml" + if cache_dir.exists(): + return cache_dir + + # The old cache directory *does not* exist + heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" + if not heuristic_dir_for_backwards_compat.exists(): + return cache_dir + + root_dir_to_delete = Path(xdg_cache_home) / "org" + openml_logger.warning( + "An old cache directory was found at '%s'. This directory is no longer used by " + "OpenML-Python. To silence this warning you would need to delete the old cache " + "directory. The cached files will then be located in '%s'.", + root_dir_to_delete, + cache_dir, + ) + return Path(xdg_cache_home) + + +@dataclass(frozen=True) +class OpenMLConfig: + apikey: str = "" + server: str = "https://www.openml.org/api/v1/xml" + cachedir: Path = _resolve_default_cache_dir() # noqa: RUF009 + avoid_duplicate_runs: bool = False + retry_policy: Literal["human", "robot"] = "human" + connection_n_retries: int = 5 + show_progress: bool = False def _create_log_handlers(create_file_handler: bool = True) -> None: # noqa: FBT001, FBT002 """Creates but does not attach the log handlers.""" - global console_handler, file_handler # noqa: PLW0603 + global console_handler, file_handler, _root_cache_directory # noqa: PLW0602 if console_handler is not None or file_handler is not None: logger.debug("Requested to create log handlers, but they are already created.") return @@ -105,61 +150,22 @@ def set_file_log_level(file_output_level: int) -> None: _set_level_register_and_store(file_handler, file_output_level) -# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) -_user_path = Path("~").expanduser().absolute() - - -def _resolve_default_cache_dir() -> Path: - user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) - if user_defined_cache_dir is not None: - return Path(user_defined_cache_dir) - - if platform.system().lower() != "linux": - return _user_path / ".openml" - - xdg_cache_home = os.environ.get("XDG_CACHE_HOME") - if xdg_cache_home is None: - return Path("~", ".cache", "openml") +_config: OpenMLConfig = OpenMLConfig() +_root_cache_directory: Path = _config.cachedir - # This is the proper XDG_CACHE_HOME directory, but - # we unfortunately had a problem where we used XDG_CACHE_HOME/org, - # we check heuristically if this old directory still exists and issue - # a warning if it does. There's too much data to move to do this for the user. - # The new cache directory exists - cache_dir = Path(xdg_cache_home) / "openml" - if cache_dir.exists(): - return cache_dir +def __getattr__(name: str) -> Any: + if hasattr(_config, name): + return getattr(_config, name) + raise AttributeError(f"module 'openml.config' has no attribute '{name}'") - # The old cache directory *does not* exist - heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" - if not heuristic_dir_for_backwards_compat.exists(): - return cache_dir - root_dir_to_delete = Path(xdg_cache_home) / "org" - openml_logger.warning( - "An old cache directory was found at '%s'. This directory is no longer used by " - "OpenML-Python. To silence this warning you would need to delete the old cache " - "directory. The cached files will then be located in '%s'.", - root_dir_to_delete, - cache_dir, - ) - return Path(xdg_cache_home) - - -_defaults: _Config = { - "apikey": "", - "server": "https://www.openml.org/api/v1/xml", - "cachedir": _resolve_default_cache_dir(), - "avoid_duplicate_runs": False, - "retry_policy": "human", - "connection_n_retries": 5, - "show_progress": False, -} - -# Default values are actually added here in the _setup() function which is -# called at the end of this module -server = _defaults["server"] +def __setattr__(name: str, value: Any) -> None: # noqa: N807 + global _config + if hasattr(_config, name): + _config = replace(_config, **{name: value}) + else: + raise AttributeError(f"module 'openml.config' has no attribute '{name}'") def get_server_base_url() -> str: @@ -172,23 +178,12 @@ def get_server_base_url() -> str: ------- str """ - domain, path = server.split("/api", maxsplit=1) + domain, _ = _config.server.split("/api", maxsplit=1) return domain.replace("api", "www") -apikey: str = _defaults["apikey"] -show_progress: bool = _defaults["show_progress"] -# The current cache directory (without the server name) -_root_cache_directory: Path = Path(_defaults["cachedir"]) -avoid_duplicate_runs = _defaults["avoid_duplicate_runs"] - -retry_policy: Literal["human", "robot"] = _defaults["retry_policy"] -connection_n_retries: int = _defaults["connection_n_retries"] - - def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None: - global retry_policy # noqa: PLW0603 - global connection_n_retries # noqa: PLW0603 + global _config default_retries_by_policy = {"human": 5, "robot": 50} if value not in default_retries_by_policy: @@ -202,8 +197,11 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N if isinstance(n_retries, int) and n_retries < 1: raise ValueError(f"`n_retries` is '{n_retries}' but must be positive.") - retry_policy = value - connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries + _config = replace( + _config, + retry_policy=value, + connection_n_retries=(default_retries_by_policy[value] if n_retries is None else n_retries), + ) class ConfigurationForExamples: @@ -222,24 +220,30 @@ def start_using_configuration_for_example(cls) -> None: To configuration as was before this call is stored, and can be recovered by using the `stop_use_example_configuration` method. """ - global server # noqa: PLW0603 - global apikey # noqa: PLW0603 + global _config - if cls._start_last_called and server == cls._test_server and apikey == cls._test_apikey: + if ( + cls._start_last_called + and _config.server == cls._test_server + and _config.apikey == cls._test_apikey + ): # Method is called more than once in a row without modifying the server or apikey. # We don't want to save the current test configuration as a last used configuration. return - cls._last_used_server = server - cls._last_used_key = apikey + cls._last_used_server = _config.server + cls._last_used_key = _config.apikey cls._start_last_called = True # Test server key for examples - server = cls._test_server - apikey = cls._test_apikey + _config = replace( + _config, + server=cls._test_server, + apikey=cls._test_apikey, + ) warnings.warn( - f"Switching to the test server {server} to not upload results to the live server. " - "Using the test server may result in reduced performance of the API!", + f"Switching to the test server {_config.server} to not upload results to " + "the live server. Using the test server may result in reduced performance of the API!", stacklevel=2, ) @@ -254,11 +258,9 @@ def stop_using_configuration_for_example(cls) -> None: "`start_use_example_configuration` must be called first.", ) - global server # noqa: PLW0603 - global apikey # noqa: PLW0603 + global _config + _config = replace(_config, server=cls._test_server, apikey=cls._test_apikey) - server = cast(str, cls._last_used_server) - apikey = cast(str, cls._last_used_key) cls._start_last_called = False @@ -327,7 +329,7 @@ def determine_config_file_path() -> Path: return config_dir / "config" -def _setup(config: _Config | None = None) -> None: +def _setup(config: dict[str, Any] | None = None) -> None: """Setup openml package. Called on first import. Reads the config file and sets up apikey, server, cache appropriately. @@ -336,11 +338,8 @@ def _setup(config: _Config | None = None) -> None: openml.config.server = SOMESERVER We could also make it a property but that's less clear. """ - global apikey # noqa: PLW0603 - global server # noqa: PLW0603 - global _root_cache_directory # noqa: PLW0603 - global avoid_duplicate_runs # noqa: PLW0603 - global show_progress # noqa: PLW0603 + global _config + global _root_cache_directory config_file = determine_config_file_path() config_dir = config_file.parent @@ -358,19 +357,24 @@ def _setup(config: _Config | None = None) -> None: if config is None: config = _parse_config(config_file) - avoid_duplicate_runs = config["avoid_duplicate_runs"] - apikey = config["apikey"] - server = config["server"] - show_progress = config["show_progress"] - n_retries = int(config["connection_n_retries"]) + _config = replace( + _config, + apikey=config["apikey"], + server=config["server"], + show_progress=config["show_progress"], + avoid_duplicate_runs=config["avoid_duplicate_runs"], + retry_policy=config["retry_policy"], + connection_n_retries=int(config["connection_n_retries"]), + ) - set_retry_policy(config["retry_policy"], n_retries) + set_retry_policy(config["retry_policy"], _config.connection_n_retries) user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) if user_defined_cache_dir is not None: short_cache_dir = Path(user_defined_cache_dir) else: short_cache_dir = Path(config["cachedir"]) + _root_cache_directory = short_cache_dir.expanduser().resolve() try: @@ -389,29 +393,31 @@ def _setup(config: _Config | None = None) -> None: def set_field_in_config_file(field: str, value: Any) -> None: """Overwrites the `field` in the configuration file with the new `value`.""" - if field not in _defaults: - raise ValueError(f"Field '{field}' is not valid and must be one of '{_defaults.keys()}'.") + global _config + if not hasattr(_config, field): + raise ValueError( + f"Field '{field}' is not valid and must be one of '{_config.__dict__.keys()}'." + ) - # TODO(eddiebergman): This use of globals has gone too far - globals()[field] = value + _config = replace(_config, **{field: value}) config_file = determine_config_file_path() - config = _parse_config(config_file) + existing = _parse_config(config_file) with config_file.open("w") as fh: - for f in _defaults: + for f in _config.__dict__: # We can't blindly set all values based on globals() because when the user # sets it through config.FIELD it should not be stored to file. # There doesn't seem to be a way to avoid writing defaults to file with configparser, # because it is impossible to distinguish from an explicitly set value that matches # the default value, to one that was set to its default because it was omitted. - value = globals()[f] if f == field else config.get(f) # type: ignore - if value is not None: - fh.write(f"{f} = {value}\n") + v = value if f == field else existing.get(f) + if v is not None: + fh.write(f"{f} = {v}\n") -def _parse_config(config_file: str | Path) -> _Config: +def _parse_config(config_file: str | Path) -> dict[str, Any]: """Parse the config file, set up defaults.""" config_file = Path(config_file) - config = configparser.RawConfigParser(defaults=_defaults) # type: ignore + config = configparser.RawConfigParser(defaults=_config.__dict__) # type: ignore # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. # Cheat the ConfigParser module by adding a fake section header @@ -434,16 +440,8 @@ def _parse_config(config_file: str | Path) -> _Config: return configuration # type: ignore -def get_config_as_dict() -> _Config: - return { - "apikey": apikey, - "server": server, - "cachedir": _root_cache_directory, - "avoid_duplicate_runs": avoid_duplicate_runs, - "connection_n_retries": connection_n_retries, - "retry_policy": retry_policy, - "show_progress": show_progress, - } +def get_config_as_dict() -> dict[str, Any]: + return _config.__dict__.copy() # NOTE: For backwards compatibility, we keep the `str` @@ -467,7 +465,7 @@ def get_cache_directory() -> str: The current cache directory. """ - url_suffix = urlparse(server).netloc + url_suffix = urlparse(_config.server).netloc reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118 return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118 @@ -491,7 +489,7 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: -------- get_cache_directory """ - global _root_cache_directory # noqa: PLW0603 + global _root_cache_directory _root_cache_directory = Path(root_cache_directory) @@ -502,7 +500,7 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: @contextmanager -def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: +def overwrite_config_context(config: dict[str, Any]) -> Iterator[dict[str, Any]]: """A context manager to temporarily override variables in the configuration.""" existing_config = get_config_as_dict() merged_config = {**existing_config, **config} @@ -515,10 +513,10 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: __all__ = [ "get_cache_directory", + "get_config_as_dict", "set_root_cache_directory", "start_using_configuration_for_example", "stop_using_configuration_for_example", - "get_config_as_dict", ] _setup() From 0159f474c6bbc15f20d52bc946bd252bd852b196 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 30 Dec 2025 09:11:27 +0500 Subject: [PATCH 002/156] set up folder structure and base code --- openml/_api/__init__.py | 8 +++ openml/_api/config.py | 5 ++ openml/_api/http/__init__.py | 1 + openml/_api/http/client.py | 23 ++++++ openml/_api/http/utils.py | 0 openml/_api/resources/__init__.py | 2 + openml/_api/resources/base.py | 22 ++++++ openml/_api/resources/datasets.py | 13 ++++ openml/_api/resources/tasks.py | 113 ++++++++++++++++++++++++++++++ openml/_api/runtime/core.py | 58 +++++++++++++++ openml/_api/runtime/fallback.py | 5 ++ openml/tasks/functions.py | 8 ++- 12 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 openml/_api/__init__.py create mode 100644 openml/_api/config.py create mode 100644 openml/_api/http/__init__.py create mode 100644 openml/_api/http/client.py create mode 100644 openml/_api/http/utils.py create mode 100644 openml/_api/resources/__init__.py create mode 100644 openml/_api/resources/base.py create mode 100644 openml/_api/resources/datasets.py create mode 100644 openml/_api/resources/tasks.py create mode 100644 openml/_api/runtime/core.py create mode 100644 openml/_api/runtime/fallback.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py new file mode 100644 index 000000000..5089f94dd --- /dev/null +++ b/openml/_api/__init__.py @@ -0,0 +1,8 @@ +from openml._api.runtime.core import APIContext + + +def set_api_version(version: str, strict=False): + api_context.set_version(version=version, strict=strict) + + +api_context = APIContext() diff --git a/openml/_api/config.py b/openml/_api/config.py new file mode 100644 index 000000000..bd93c3cad --- /dev/null +++ b/openml/_api/config.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +API_V1_SERVER = "https://www.openml.org/api/v1/xml" +API_V2_SERVER = "http://127.0.0.1:8001" +API_KEY = "..." diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py new file mode 100644 index 000000000..fde2a5b0a --- /dev/null +++ b/openml/_api/http/__init__.py @@ -0,0 +1 @@ +from openml._api.http.client import HTTPClient diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py new file mode 100644 index 000000000..81a9213e3 --- /dev/null +++ b/openml/_api/http/client.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import requests + +from openml.__version__ import __version__ + + +class HTTPClient: + def __init__(self, base_url: str): + self.base_url = base_url + self.headers = {"user-agent": f"openml-python/{__version__}"} + + def get(self, path, params=None): + url = f"{self.base_url}/{path}" + return requests.get(url, params=params, headers=self.headers) + + def post(self, path, data=None, files=None): + url = f"{self.base_url}/{path}" + return requests.post(url, data=data, files=files, headers=self.headers) + + def delete(self, path, params=None): + url = f"{self.base_url}/{path}" + return requests.delete(url, params=params, headers=self.headers) diff --git a/openml/_api/http/utils.py b/openml/_api/http/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py new file mode 100644 index 000000000..078fc5998 --- /dev/null +++ b/openml/_api/resources/__init__.py @@ -0,0 +1,2 @@ +from openml._api.resources.datasets import DatasetsV1, DatasetsV2 +from openml._api.resources.tasks import TasksV1, TasksV2 diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py new file mode 100644 index 000000000..1fae27665 --- /dev/null +++ b/openml/_api/resources/base.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.http import HTTPClient + + +class ResourceAPI: + def __init__(self, http: HTTPClient): + self._http = http + + +class DatasetsAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, id: int) -> dict: ... + + +class TasksAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, id: int) -> dict: ... diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py new file mode 100644 index 000000000..cd1bb595a --- /dev/null +++ b/openml/_api/resources/datasets.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from openml._api.resources.base import DatasetsAPI + + +class DatasetsV1(DatasetsAPI): + def get(self, id): + pass + + +class DatasetsV2(DatasetsAPI): + def get(self, id): + pass diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py new file mode 100644 index 000000000..b0e9afbf8 --- /dev/null +++ b/openml/_api/resources/tasks.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import xmltodict + +from openml._api.resources.base import TasksAPI +from openml.tasks.task import ( + OpenMLClassificationTask, + OpenMLClusteringTask, + OpenMLLearningCurveTask, + OpenMLRegressionTask, + OpenMLTask, + TaskType, +) + + +class TasksV1(TasksAPI): + def get(self, id, return_response=False): + path = f"task/{id}" + response = self._http.get(path) + xml_content = response.content + task = self._create_task_from_xml(xml_content) + + if return_response: + return task, response + + return task + + def _create_task_from_xml(self, xml: str) -> OpenMLTask: + """Create a task given a xml string. + + Parameters + ---------- + xml : string + Task xml representation. + + Returns + ------- + OpenMLTask + """ + dic = xmltodict.parse(xml)["oml:task"] + estimation_parameters = {} + inputs = {} + # Due to the unordered structure we obtain, we first have to extract + # the possible keys of oml:input; dic["oml:input"] is a list of + # OrderedDicts + + # Check if there is a list of inputs + if isinstance(dic["oml:input"], list): + for input_ in dic["oml:input"]: + name = input_["@name"] + inputs[name] = input_ + # Single input case + elif isinstance(dic["oml:input"], dict): + name = dic["oml:input"]["@name"] + inputs[name] = dic["oml:input"] + + evaluation_measures = None + if "evaluation_measures" in inputs: + evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][ + "oml:evaluation_measure" + ] + + task_type = TaskType(int(dic["oml:task_type_id"])) + common_kwargs = { + "task_id": dic["oml:task_id"], + "task_type": dic["oml:task_type"], + "task_type_id": task_type, + "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], + "evaluation_measure": evaluation_measures, + } + # TODO: add OpenMLClusteringTask? + if task_type in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ): + # Convert some more parameters + for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ + "oml:parameter" + ]: + name = parameter["@name"] + text = parameter.get("#text", "") + estimation_parameters[name] = text + + common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:type"] + common_kwargs["estimation_procedure_id"] = int( + inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"] + ) + + common_kwargs["estimation_parameters"] = estimation_parameters + common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][ + "oml:target_feature" + ] + common_kwargs["data_splits_url"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:data_splits_url"] + + cls = { + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }.get(task_type) + if cls is None: + raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") + return cls(**common_kwargs) # type: ignore + + +class TasksV2(TasksAPI): + def get(self, id): + pass diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py new file mode 100644 index 000000000..80f35587c --- /dev/null +++ b/openml/_api/runtime/core.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from openml._api.config import ( + API_V1_SERVER, + API_V2_SERVER, +) +from openml._api.http.client import HTTPClient +from openml._api.resources import ( + DatasetsV1, + DatasetsV2, + TasksV1, + TasksV2, +) +from openml._api.runtime.fallback import FallbackProxy + + +class APIBackend: + def __init__(self, *, datasets, tasks): + self.datasets = datasets + self.tasks = tasks + + +def build_backend(version: str, strict: bool) -> APIBackend: + v1_http = HTTPClient(API_V1_SERVER) + v2_http = HTTPClient(API_V2_SERVER) + + v1 = APIBackend( + datasets=DatasetsV1(v1_http), + tasks=TasksV1(v1_http), + ) + + if version == "v1": + return v1 + + v2 = APIBackend( + datasets=DatasetsV2(v2_http), + tasks=TasksV2(v2_http), + ) + + if strict: + return v2 + + return APIBackend( + datasets=FallbackProxy(v2.datasets, v1.datasets), + tasks=FallbackProxy(v2.tasks, v1.tasks), + ) + + +class APIContext: + def __init__(self): + self._backend = build_backend("v1", strict=False) + + def set_version(self, version: str, strict: bool = False): + self._backend = build_backend(version, strict) + + @property + def backend(self): + return self._backend diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py new file mode 100644 index 000000000..56e96a966 --- /dev/null +++ b/openml/_api/runtime/fallback.py @@ -0,0 +1,5 @@ +from __future__ import annotations + + +class FallbackProxy: + pass diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index d2bf5e946..91be65965 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -12,6 +12,7 @@ import openml._api_calls import openml.utils +from openml._api import api_context from openml.datasets import get_dataset from openml.exceptions import OpenMLCacheException @@ -442,11 +443,12 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get") + task, response = api_context.backend.tasks.get(task_id, return_response=True) with xml_file.open("w", encoding="utf8") as fh: - fh.write(task_xml) - return _create_task_from_xml(task_xml) + fh.write(response.text) + + return task def _create_task_from_xml(xml: str) -> OpenMLTask: From 834782c105b5244095e20f17059c081b88634640 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Tue, 30 Dec 2025 12:31:52 +0530 Subject: [PATCH 003/156] bug fixing --- examples/Advanced/datasets_tutorial.py | 2 +- .../benchmark_with_optunahub.py | 4 +-- .../flow_id_tutorial.py | 2 +- openml/_api_calls.py | 12 ++++----- openml/cli.py | 2 +- openml/config.py | 16 +++++++----- openml/runs/functions.py | 2 +- openml/testing.py | 14 +++++----- tests/conftest.py | 16 ++++++------ tests/test_datasets/test_dataset_functions.py | 14 +++++----- tests/test_openml/test_config.py | 26 +++++++++---------- tests/test_utils/test_utils.py | 2 +- 12 files changed, 58 insertions(+), 54 deletions(-) diff --git a/examples/Advanced/datasets_tutorial.py b/examples/Advanced/datasets_tutorial.py index cc57686d0..3a4833206 100644 --- a/examples/Advanced/datasets_tutorial.py +++ b/examples/Advanced/datasets_tutorial.py @@ -139,7 +139,7 @@ # only for the dataset owner. Further, critical fields cannot be edited if the dataset has any # tasks associated with it. To edit critical fields of a dataset (without tasks) owned by you, # configure the API key: -# openml.config.apikey = 'FILL_IN_OPENML_API_KEY' +# openml.config._config.apikey = 'FILL_IN_OPENML_API_KEY' # This example here only shows a failure when trying to work on a dataset not owned by you: # %% diff --git a/examples/_external_or_deprecated/benchmark_with_optunahub.py b/examples/_external_or_deprecated/benchmark_with_optunahub.py index ece3e7c40..c8f5f7b0c 100644 --- a/examples/_external_or_deprecated/benchmark_with_optunahub.py +++ b/examples/_external_or_deprecated/benchmark_with_optunahub.py @@ -44,7 +44,7 @@ # account (you don't need one for anything else, just to upload your results), # go to your profile and select the API-KEY. # Or log in, and navigate to https://www.openml.org/auth/api-key -openml.config.apikey = "" +openml.config._config.apikey = "" ############################################################################ # Prepare for preprocessors and an OpenML task # ============================================ @@ -95,7 +95,7 @@ def objective(trial: optuna.Trial) -> Pipeline: run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False) logger.log(1, f"Model has been trained - {run}") - if openml.config.apikey != "": + if openml.config._config.apikey != "": try: run.publish() diff --git a/examples/_external_or_deprecated/flow_id_tutorial.py b/examples/_external_or_deprecated/flow_id_tutorial.py index e813655fc..c533cfd9f 100644 --- a/examples/_external_or_deprecated/flow_id_tutorial.py +++ b/examples/_external_or_deprecated/flow_id_tutorial.py @@ -16,7 +16,7 @@ # %% openml.config.start_using_configuration_for_example() -openml.config.server = "https://api.openml.org/api/v1/xml" +openml.config._configserver = "https://api.openml.org/api/v1/xml" # %% # Defining a classifier diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 12567ac7a..c3f6d285f 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -172,7 +172,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if config.show_progress else None, + progress=ProgressBar() if config._config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -317,7 +317,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = config.apikey + data["api_key"] = config._config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -337,8 +337,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if config.apikey: - data["api_key"] = config.apikey + if config._config.apikey: + data["api_key"] = config._config.apikey return _send_request( request_method=request_method, url=url, @@ -363,10 +363,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, config.connection_n_retries) + n_retries = max(1, config._config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if config._config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/cli.py b/openml/cli.py index d0a46e498..fb39afe97 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -339,7 +339,7 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in config._defaults if f not in ["max_retries"]] + configurable_fields = [f for f in config.get_config_as_dict() if f not in ["max_retries"]] parser_configure.add_argument( "field", diff --git a/openml/config.py b/openml/config.py index 98a48a1c6..20825463e 100644 --- a/openml/config.py +++ b/openml/config.py @@ -15,7 +15,7 @@ from dataclasses import dataclass, replace from io import StringIO from pathlib import Path -from typing import Any, Iterator +from typing import Any, Iterator, cast from typing_extensions import Literal from urllib.parse import urlparse @@ -71,7 +71,7 @@ def _resolve_default_cache_dir() -> Path: return Path(xdg_cache_home) -@dataclass(frozen=True) +@dataclass class OpenMLConfig: apikey: str = "" server: str = "https://www.openml.org/api/v1/xml" @@ -259,8 +259,11 @@ def stop_using_configuration_for_example(cls) -> None: ) global _config - _config = replace(_config, server=cls._test_server, apikey=cls._test_apikey) - + _config = replace( + _config, + server=cast(str, cls._last_used_server), + apikey=cast(str, cls._last_used_key), + ) cls._start_last_called = False @@ -334,8 +337,8 @@ def _setup(config: dict[str, Any] | None = None) -> None: Reads the config file and sets up apikey, server, cache appropriately. key and server can be set by the user simply using - openml.config.apikey = THEIRKEY - openml.config.server = SOMESERVER + openml.config._config.apikey = THEIRKEY + openml.config._config.server = SOMESERVER We could also make it a property but that's less clear. """ global _config @@ -376,6 +379,7 @@ def _setup(config: dict[str, Any] | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() + _config = replace(_config, cachedir=_root_cache_directory) try: cache_exists = _root_cache_directory.exists() diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 666b75c37..7fa560833 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -226,7 +226,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 raise ValueError("flow_tags should be a list") if avoid_duplicate_runs is None: - avoid_duplicate_runs = openml.config.avoid_duplicate_runs + avoid_duplicate_runs = openml.config._config.avoid_duplicate_runs # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018). # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019). diff --git a/openml/testing.py b/openml/testing.py index d1da16876..fbf7edf44 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -99,13 +99,13 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: os.chdir(self.workdir) self.cached = True - openml.config.apikey = TestBase.user_key + openml.config._config.apikey = TestBase.user_key self.production_server = "https://www.openml.org/api/v1/xml" openml.config.set_root_cache_directory(str(self.workdir)) # Increase the number of retries to avoid spurious server failures - self.retry_policy = openml.config.retry_policy - self.connection_n_retries = openml.config.connection_n_retries + self.retry_policy = openml.config._config.retry_policy + self.connection_n_retries = openml.config._config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) def use_production_server(self) -> None: @@ -114,8 +114,8 @@ def use_production_server(self) -> None: Please use this sparingly - it is better to use the test server. """ - openml.config.server = self.production_server - openml.config.apikey = "" + openml.config._config.server = self.production_server + openml.config._config.apikey = "" def tearDown(self) -> None: """Tear down the test""" @@ -127,8 +127,8 @@ def tearDown(self) -> None: # one of the files may still be used by another process raise e - openml.config.connection_n_retries = self.connection_n_retries - openml.config.retry_policy = self.retry_policy + openml.config._config.connection_n_retries = self.connection_n_retries + openml.config._config.retry_policy = self.retry_policy @classmethod def _mark_entity_for_removal( diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..ba7c65813 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,8 +97,8 @@ def delete_remote_files(tracker, flow_names) -> None: :param tracker: Dict :return: None """ - openml.config.server = TestBase.test_server - openml.config.apikey = TestBase.user_key + openml.config._config.server = TestBase.test_server + openml.config._config.apikey = TestBase.user_key # reordering to delete sub flows at the end of flows # sub-flows have shorter names, hence, sorting by descending order of flow name length @@ -263,8 +263,8 @@ def verify_cache_state(test_files_directory) -> Iterator[None]: @pytest.fixture(autouse=True, scope="session") def as_robot() -> Iterator[None]: - policy = openml.config.retry_policy - n_retries = openml.config.connection_n_retries + policy = openml.config._config.retry_policy + n_retries = openml.config._config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) yield openml.config.set_retry_policy(policy, n_retries) @@ -273,12 +273,12 @@ def as_robot() -> Iterator[None]: @pytest.fixture(autouse=True) def with_server(request): if "production" in request.keywords: - openml.config.server = "https://www.openml.org/api/v1/xml" - openml.config.apikey = None + openml.config._config.server = "https://www.openml.org/api/v1/xml" + openml.config._config.apikey = None yield return - openml.config.server = "https://test.openml.org/api/v1/xml" - openml.config.apikey = TestBase.user_key + openml.config._config.server = "https://test.openml.org/api/v1/xml" + openml.config._config.apikey = TestBase.user_key yield diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 266a6f6f7..ab5a4d8b8 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -153,7 +153,7 @@ def test_check_datasets_active(self): openml.datasets.check_datasets_active, [79], ) - openml.config.server = self.test_server + openml.config._config.server = self.test_server def test_illegal_character_tag(self): dataset = openml.datasets.get_dataset(1) @@ -179,7 +179,7 @@ def test__name_to_id_with_deactivated(self): self.use_production_server() # /d/1 was deactivated assert openml.datasets.functions._name_to_id("anneal") == 2 - openml.config.server = self.test_server + openml.config._config.server = self.test_server @pytest.mark.production() def test__name_to_id_with_multiple_active(self): @@ -417,8 +417,8 @@ def test__getarff_md5_issue(self): "oml:md5_checksum": "abc", "oml:url": "https://www.openml.org/data/download/61", } - n = openml.config.connection_n_retries - openml.config.connection_n_retries = 1 + n = openml.config._config.connection_n_retries + openml.config._config.connection_n_retries = 1 self.assertRaisesRegex( OpenMLHashException, @@ -428,7 +428,7 @@ def test__getarff_md5_issue(self): description, ) - openml.config.connection_n_retries = n + openml.config._config.connection_n_retries = n def test__get_dataset_features(self): features_file = _get_dataset_features_file(self.workdir, 2) @@ -588,7 +588,7 @@ def test_data_status(self): # admin key for test server (only admins can activate datasets. # all users can deactivate their own datasets) - openml.config.apikey = TestBase.admin_key + openml.config._config.apikey = TestBase.admin_key openml.datasets.status_update(did, "active") self._assert_status_of_dataset(did=did, status="active") @@ -1507,7 +1507,7 @@ def test_list_datasets_with_high_size_parameter(self): datasets_b = openml.datasets.list_datasets(size=np.inf) # Reverting to test server - openml.config.server = self.test_server + openml.config._config.server = self.test_server assert len(datasets_a) == len(datasets_b) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 7ef223504..3ff4bcb00 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -46,7 +46,7 @@ class TestConfig(openml.testing.TestBase): def test_non_writable_home(self, log_handler_mock, warnings_mock): with tempfile.TemporaryDirectory(dir=self.workdir) as td: os.chmod(td, 0o444) - _dd = copy(openml.config._defaults) + _dd = copy(openml.config.get_config_as_dict()) _dd["cachedir"] = Path(td) / "something-else" openml.config._setup(_dd) @@ -110,26 +110,26 @@ class TestConfigurationForExamples(openml.testing.TestBase): def test_switch_to_example_configuration(self): """Verifies the test configuration is loaded properly.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config.apikey = TestBase.admin_key - openml.config.server = self.production_server + openml.config._config.apikey = TestBase.admin_key + openml.config._config.server = self.production_server openml.config.start_using_configuration_for_example() - assert openml.config.apikey == TestBase.user_key - assert openml.config.server == self.test_server + assert openml.config._config.apikey == TestBase.user_key + assert openml.config._config.server == self.test_server @pytest.mark.production() def test_switch_from_example_configuration(self): """Verifies the previous configuration is loaded after stopping.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config.apikey = TestBase.user_key - openml.config.server = self.production_server + openml.config._config.apikey = TestBase.user_key + openml.config._config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - assert openml.config.apikey == TestBase.user_key - assert openml.config.server == self.production_server + assert openml.config._config.apikey == TestBase.user_key + assert openml.config._config.server == self.production_server def test_example_configuration_stop_before_start(self): """Verifies an error is raised if `stop_...` is called before `start_...`.""" @@ -146,15 +146,15 @@ def test_example_configuration_stop_before_start(self): @pytest.mark.production() def test_example_configuration_start_twice(self): """Checks that the original config can be returned to if `start..` is called twice.""" - openml.config.apikey = TestBase.user_key - openml.config.server = self.production_server + openml.config._config.apikey = TestBase.user_key + openml.config._config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - assert openml.config.apikey == TestBase.user_key - assert openml.config.server == self.production_server + assert openml.config._config.apikey == TestBase.user_key + assert openml.config._config.server == self.production_server def test_configuration_file_not_overwritten_on_load(): diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 35be84903..1c0b50fe5 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -44,7 +44,7 @@ def min_number_evaluations_on_test_server() -> int: def _mocked_perform_api_call(call, request_method): - url = openml.config.server + "/" + call + url = openml.config._config.server + "/" + call return openml._api_calls._download_text_file(url) From 38ae9beb47122c54df2122e113ac8a4727bb2eb7 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 00:07:30 +0530 Subject: [PATCH 004/156] test failures fix --- examples/Basics/introduction_tutorial.py | 2 +- openml/config.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/Basics/introduction_tutorial.py b/examples/Basics/introduction_tutorial.py index c864772f5..648bc90ed 100644 --- a/examples/Basics/introduction_tutorial.py +++ b/examples/Basics/introduction_tutorial.py @@ -35,7 +35,7 @@ # %% import openml -openml.config.apikey = "YOURKEY" +openml.config._config.apikey = "YOURKEY" # %% [markdown] # ## Caching diff --git a/openml/config.py b/openml/config.py index 20825463e..f2020b8c6 100644 --- a/openml/config.py +++ b/openml/config.py @@ -261,8 +261,8 @@ def stop_using_configuration_for_example(cls) -> None: global _config _config = replace( _config, - server=cast(str, cls._last_used_server), - apikey=cast(str, cls._last_used_key), + server=cast("str", cls._last_used_server), + apikey=cast("str", cls._last_used_key), ) cls._start_last_called = False @@ -421,7 +421,7 @@ def set_field_in_config_file(field: str, value: Any) -> None: def _parse_config(config_file: str | Path) -> dict[str, Any]: """Parse the config file, set up defaults.""" config_file = Path(config_file) - config = configparser.RawConfigParser(defaults=_config.__dict__) # type: ignore + config = configparser.RawConfigParser(defaults=OpenMLConfig().__dict__) # type: ignore # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. # Cheat the ConfigParser module by adding a fake section header @@ -493,8 +493,9 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: -------- get_cache_directory """ - global _root_cache_directory + global _root_cache_directory, _config _root_cache_directory = Path(root_cache_directory) + _config = replace(_config, cachedir=_root_cache_directory) start_using_configuration_for_example = ( From 93ab9c21ce0dcd307666f98766b924e5bc1c09ba Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 00:13:37 +0530 Subject: [PATCH 005/156] Update flow_id_tutorial.py --- examples/_external_or_deprecated/flow_id_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/_external_or_deprecated/flow_id_tutorial.py b/examples/_external_or_deprecated/flow_id_tutorial.py index c533cfd9f..496102085 100644 --- a/examples/_external_or_deprecated/flow_id_tutorial.py +++ b/examples/_external_or_deprecated/flow_id_tutorial.py @@ -16,7 +16,7 @@ # %% openml.config.start_using_configuration_for_example() -openml.config._configserver = "https://api.openml.org/api/v1/xml" +openml.config._config.server = "https://api.openml.org/api/v1/xml" # %% # Defining a classifier From aa25dd69aa2a8b08f17a3bd2d411a1829fd6eccf Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 00:24:36 +0530 Subject: [PATCH 006/156] _defaults bug fixing --- openml/cli.py | 6 +++++- tests/test_openml/test_config.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openml/cli.py b/openml/cli.py index fb39afe97..c1363ea74 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -9,6 +9,8 @@ from typing import Callable from urllib.parse import urlparse +from attr import fields + from openml import config @@ -339,7 +341,9 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in config.get_config_as_dict() if f not in ["max_retries"]] + configurable_fields = [ + f.name for f in fields(config.OpenMLConfig) if f.name not in ["max_retries"] + ] parser_configure.add_argument( "field", diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 3ff4bcb00..104639460 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -46,7 +46,7 @@ class TestConfig(openml.testing.TestBase): def test_non_writable_home(self, log_handler_mock, warnings_mock): with tempfile.TemporaryDirectory(dir=self.workdir) as td: os.chmod(td, 0o444) - _dd = copy(openml.config.get_config_as_dict()) + _dd = copy(openml.config.OpenMLConfig().__dict__) _dd["cachedir"] = Path(td) / "something-else" openml.config._setup(_dd) From a98b6b1c7753dbf02d8d6a2dc552abff8e8c60bb Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 19:10:58 +0530 Subject: [PATCH 007/156] removed __setattr__ given it is not supported --- openml/config.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/openml/config.py b/openml/config.py index f2020b8c6..ad8060e7d 100644 --- a/openml/config.py +++ b/openml/config.py @@ -160,14 +160,6 @@ def __getattr__(name: str) -> Any: raise AttributeError(f"module 'openml.config' has no attribute '{name}'") -def __setattr__(name: str, value: Any) -> None: # noqa: N807 - global _config - if hasattr(_config, name): - _config = replace(_config, **{name: value}) - else: - raise AttributeError(f"module 'openml.config' has no attribute '{name}'") - - def get_server_base_url() -> str: """Return the base URL of the currently configured server. From 52ef37999fad8509e5e85b8512e442bd9dc69e04 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 5 Jan 2026 12:48:58 +0500 Subject: [PATCH 008/156] fix pre-commit --- openml/_api/__init__.py | 2 +- openml/_api/http/__init__.py | 2 ++ openml/_api/http/client.py | 32 +++++++++++++++++++++++-------- openml/_api/resources/__init__.py | 2 ++ openml/_api/resources/base.py | 13 +++++++++++-- openml/_api/resources/datasets.py | 15 +++++++++++---- openml/_api/resources/tasks.py | 25 +++++++++++++++++++----- openml/_api/runtime/__init__.py | 0 openml/_api/runtime/core.py | 23 +++++++++++----------- openml/_api/runtime/fallback.py | 9 ++++++++- openml/tasks/functions.py | 12 ++++++++---- 11 files changed, 99 insertions(+), 36 deletions(-) create mode 100644 openml/_api/runtime/__init__.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 5089f94dd..881f40671 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -1,7 +1,7 @@ from openml._api.runtime.core import APIContext -def set_api_version(version: str, strict=False): +def set_api_version(version: str, *, strict: bool = False) -> None: api_context.set_version(version=version, strict=strict) diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py index fde2a5b0a..8e6d1e4ce 100644 --- a/openml/_api/http/__init__.py +++ b/openml/_api/http/__init__.py @@ -1 +1,3 @@ from openml._api.http.client import HTTPClient + +__all__ = ["HTTPClient"] diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index 81a9213e3..dea5de809 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,23 +1,39 @@ from __future__ import annotations +from typing import Any, Mapping + import requests +from requests import Response from openml.__version__ import __version__ class HTTPClient: - def __init__(self, base_url: str): + def __init__(self, base_url: str) -> None: self.base_url = base_url - self.headers = {"user-agent": f"openml-python/{__version__}"} + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - def get(self, path, params=None): + def get( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: url = f"{self.base_url}/{path}" - return requests.get(url, params=params, headers=self.headers) + return requests.get(url, params=params, headers=self.headers, timeout=10) - def post(self, path, data=None, files=None): + def post( + self, + path: str, + data: Mapping[str, Any] | None = None, + files: Any = None, + ) -> Response: url = f"{self.base_url}/{path}" - return requests.post(url, data=data, files=files, headers=self.headers) + return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) - def delete(self, path, params=None): + def delete( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: url = f"{self.base_url}/{path}" - return requests.delete(url, params=params, headers=self.headers) + return requests.delete(url, params=params, headers=self.headers, timeout=10) diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index 078fc5998..b1af3c1a8 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,2 +1,4 @@ from openml._api.resources.datasets import DatasetsV1, DatasetsV2 from openml._api.resources.tasks import TasksV1, TasksV2 + +__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"] diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py index 1fae27665..6fbf8977d 100644 --- a/openml/_api/resources/base.py +++ b/openml/_api/resources/base.py @@ -4,7 +4,11 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + from requests import Response + from openml._api.http import HTTPClient + from openml.datasets.dataset import OpenMLDataset + from openml.tasks.task import OpenMLTask class ResourceAPI: @@ -14,9 +18,14 @@ def __init__(self, http: HTTPClient): class DatasetsAPI(ResourceAPI, ABC): @abstractmethod - def get(self, id: int) -> dict: ... + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... class TasksAPI(ResourceAPI, ABC): @abstractmethod - def get(self, id: int) -> dict: ... + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py index cd1bb595a..9ff1ec278 100644 --- a/openml/_api/resources/datasets.py +++ b/openml/_api/resources/datasets.py @@ -1,13 +1,20 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from openml._api.resources.base import DatasetsAPI +if TYPE_CHECKING: + from responses import Response + + from openml.datasets.dataset import OpenMLDataset + class DatasetsV1(DatasetsAPI): - def get(self, id): - pass + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError class DatasetsV2(DatasetsAPI): - def get(self, id): - pass + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index b0e9afbf8..f494fb9a3 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import xmltodict from openml._api.resources.base import TasksAPI @@ -12,12 +14,20 @@ TaskType, ) +if TYPE_CHECKING: + from requests import Response + class TasksV1(TasksAPI): - def get(self, id, return_response=False): - path = f"task/{id}" + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + path = f"task/{task_id}" response = self._http.get(path) - xml_content = response.content + xml_content = response.text task = self._create_task_from_xml(xml_content) if return_response: @@ -109,5 +119,10 @@ def _create_task_from_xml(self, xml: str) -> OpenMLTask: class TasksV2(TasksAPI): - def get(self, id): - pass + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + raise NotImplementedError diff --git a/openml/_api/runtime/__init__.py b/openml/_api/runtime/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 80f35587c..aa09a69db 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from openml._api.config import ( API_V1_SERVER, API_V2_SERVER, @@ -11,16 +13,18 @@ TasksV1, TasksV2, ) -from openml._api.runtime.fallback import FallbackProxy + +if TYPE_CHECKING: + from openml._api.resources.base import DatasetsAPI, TasksAPI class APIBackend: - def __init__(self, *, datasets, tasks): + def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): self.datasets = datasets self.tasks = tasks -def build_backend(version: str, strict: bool) -> APIBackend: +def build_backend(version: str, *, strict: bool) -> APIBackend: v1_http = HTTPClient(API_V1_SERVER) v2_http = HTTPClient(API_V2_SERVER) @@ -40,19 +44,16 @@ def build_backend(version: str, strict: bool) -> APIBackend: if strict: return v2 - return APIBackend( - datasets=FallbackProxy(v2.datasets, v1.datasets), - tasks=FallbackProxy(v2.tasks, v1.tasks), - ) + return v1 class APIContext: - def __init__(self): + def __init__(self) -> None: self._backend = build_backend("v1", strict=False) - def set_version(self, version: str, strict: bool = False): - self._backend = build_backend(version, strict) + def set_version(self, version: str, *, strict: bool = False) -> None: + self._backend = build_backend(version=version, strict=strict) @property - def backend(self): + def backend(self) -> APIBackend: return self._backend diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py index 56e96a966..1bc99d270 100644 --- a/openml/_api/runtime/fallback.py +++ b/openml/_api/runtime/fallback.py @@ -1,5 +1,12 @@ from __future__ import annotations +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.resources.base import ResourceAPI + class FallbackProxy: - pass + def __init__(self, primary: ResourceAPI, fallback: ResourceAPI): + self._primary = primary + self._fallback = fallback diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index ef67f75bf..a794ad56d 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -445,10 +445,14 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - task, response = api_context.backend.tasks.get(task_id, return_response=True) - - with xml_file.open("w", encoding="utf8") as fh: - fh.write(response.text) + result = api_context.backend.tasks.get(task_id, return_response=True) + + if isinstance(result, tuple): + task, response = result + with xml_file.open("w", encoding="utf8") as fh: + fh.write(response.text) + else: + task = result return task From 146dd2160f668149d2bd39ed691f703817df8cc6 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 5 Jan 2026 17:12:29 +0530 Subject: [PATCH 009/156] Update all files --- examples/Advanced/datasets_tutorial.py | 2 +- examples/Basics/introduction_tutorial.py | 4 +- .../benchmark_with_optunahub.py | 4 +- .../flow_id_tutorial.py | 2 +- openml/__init__.py | 9 +- openml/_api_calls.py | 19 +- openml/config.py | 798 ++++++++---------- openml/runs/functions.py | 13 +- openml/setups/functions.py | 5 +- openml/tasks/task.py | 2 +- openml/testing.py | 14 +- openml/utils.py | 6 +- tests/conftest.py | 16 +- tests/test_datasets/test_dataset_functions.py | 14 +- tests/test_openml/test_config.py | 25 +- tests/test_utils/test_utils.py | 2 +- 16 files changed, 443 insertions(+), 492 deletions(-) diff --git a/examples/Advanced/datasets_tutorial.py b/examples/Advanced/datasets_tutorial.py index 3a4833206..cc57686d0 100644 --- a/examples/Advanced/datasets_tutorial.py +++ b/examples/Advanced/datasets_tutorial.py @@ -139,7 +139,7 @@ # only for the dataset owner. Further, critical fields cannot be edited if the dataset has any # tasks associated with it. To edit critical fields of a dataset (without tasks) owned by you, # configure the API key: -# openml.config._config.apikey = 'FILL_IN_OPENML_API_KEY' +# openml.config.apikey = 'FILL_IN_OPENML_API_KEY' # This example here only shows a failure when trying to work on a dataset not owned by you: # %% diff --git a/examples/Basics/introduction_tutorial.py b/examples/Basics/introduction_tutorial.py index 648bc90ed..4b972b95b 100644 --- a/examples/Basics/introduction_tutorial.py +++ b/examples/Basics/introduction_tutorial.py @@ -35,7 +35,7 @@ # %% import openml -openml.config._config.apikey = "YOURKEY" +openml.config.apikey = "YOURKEY" # %% [markdown] # ## Caching @@ -52,4 +52,4 @@ # %% import openml -openml.config.set_root_cache_directory("YOURDIR") \ No newline at end of file +openml.config.set_root_cache_directory("YOURDIR") diff --git a/examples/_external_or_deprecated/benchmark_with_optunahub.py b/examples/_external_or_deprecated/benchmark_with_optunahub.py index c8f5f7b0c..ece3e7c40 100644 --- a/examples/_external_or_deprecated/benchmark_with_optunahub.py +++ b/examples/_external_or_deprecated/benchmark_with_optunahub.py @@ -44,7 +44,7 @@ # account (you don't need one for anything else, just to upload your results), # go to your profile and select the API-KEY. # Or log in, and navigate to https://www.openml.org/auth/api-key -openml.config._config.apikey = "" +openml.config.apikey = "" ############################################################################ # Prepare for preprocessors and an OpenML task # ============================================ @@ -95,7 +95,7 @@ def objective(trial: optuna.Trial) -> Pipeline: run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False) logger.log(1, f"Model has been trained - {run}") - if openml.config._config.apikey != "": + if openml.config.apikey != "": try: run.publish() diff --git a/examples/_external_or_deprecated/flow_id_tutorial.py b/examples/_external_or_deprecated/flow_id_tutorial.py index 496102085..e813655fc 100644 --- a/examples/_external_or_deprecated/flow_id_tutorial.py +++ b/examples/_external_or_deprecated/flow_id_tutorial.py @@ -16,7 +16,7 @@ # %% openml.config.start_using_configuration_for_example() -openml.config._config.server = "https://api.openml.org/api/v1/xml" +openml.config.server = "https://api.openml.org/api/v1/xml" # %% # Defining a classifier diff --git a/openml/__init__.py b/openml/__init__.py index c49505eb9..e23316d4d 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -18,9 +18,11 @@ # License: BSD 3-Clause from __future__ import annotations +from typing import TYPE_CHECKING + from . import ( _api_calls, - config, + config as _config_module, datasets, evaluations, exceptions, @@ -49,6 +51,11 @@ OpenMLTask, ) +if TYPE_CHECKING: + from .config import OpenMLConfigManager + +config: OpenMLConfigManager = _config_module._config + def populate_cache( task_ids: list[int] | None = None, diff --git a/openml/_api_calls.py b/openml/_api_calls.py index c3f6d285f..a72da1b8c 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -12,7 +12,7 @@ import xml import zipfile from pathlib import Path -from typing import Dict, Tuple, Union +from typing import Dict, Tuple, Union, cast import minio import requests @@ -71,7 +71,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url = config._config.server + url = cast(str, config.server) if not url.endswith("/"): url += "/" url += endpoint @@ -172,7 +172,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if config._config.show_progress else None, + progress=ProgressBar() if config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -301,7 +301,8 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_url = config._config.server.split("/api/") + openml_server = cast(str, config.server) + openml_url = openml_server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: url += "/" + filename @@ -317,7 +318,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = config._config.apikey + data["api_key"] = config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -337,8 +338,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if config._config.apikey: - data["api_key"] = config._config.apikey + if config.apikey: + data["api_key"] = config.apikey return _send_request( request_method=request_method, url=url, @@ -363,10 +364,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, config._config.connection_n_retries) + n_retries = max(1, config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if config._config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/config.py b/openml/config.py index ad8060e7d..2ecb3c64f 100644 --- a/openml/config.py +++ b/openml/config.py @@ -12,7 +12,7 @@ import shutil import warnings from contextlib import contextmanager -from dataclasses import dataclass, replace +from dataclasses import dataclass, field, replace from io import StringIO from pathlib import Path from typing import Any, Iterator, cast @@ -21,41 +21,24 @@ logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") -console_handler: logging.StreamHandler | None = None -file_handler: logging.handlers.RotatingFileHandler | None = None - -OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" -OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" -_TEST_SERVER_NORMAL_USER_KEY = "normaluser" - - -# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) -_user_path = Path("~").expanduser().absolute() def _resolve_default_cache_dir() -> Path: - user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) + user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") if user_defined_cache_dir is not None: return Path(user_defined_cache_dir) if platform.system().lower() != "linux": - return _user_path / ".openml" + return Path("~", ".openml") xdg_cache_home = os.environ.get("XDG_CACHE_HOME") if xdg_cache_home is None: return Path("~", ".cache", "openml") - # This is the proper XDG_CACHE_HOME directory, but - # we unfortunately had a problem where we used XDG_CACHE_HOME/org, - # we check heuristically if this old directory still exists and issue - # a warning if it does. There's too much data to move to do this for the user. - - # The new cache directory exists cache_dir = Path(xdg_cache_home) / "openml" if cache_dir.exists(): return cache_dir - # The old cache directory *does not* exist heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" if not heuristic_dir_for_backwards_compat.exists(): return cache_dir @@ -73,447 +56,412 @@ def _resolve_default_cache_dir() -> Path: @dataclass class OpenMLConfig: + """Dataclass storing the OpenML configuration.""" + apikey: str = "" server: str = "https://www.openml.org/api/v1/xml" - cachedir: Path = _resolve_default_cache_dir() # noqa: RUF009 + cachedir: Path = field(default_factory=_resolve_default_cache_dir) avoid_duplicate_runs: bool = False retry_policy: Literal["human", "robot"] = "human" connection_n_retries: int = 5 show_progress: bool = False + def __setattr__(self, name: str, value: Any) -> None: + if name == "apikey" and value is not None and not isinstance(value, str): + raise ValueError("apikey must be a string or None") -def _create_log_handlers(create_file_handler: bool = True) -> None: # noqa: FBT001, FBT002 - """Creates but does not attach the log handlers.""" - global console_handler, file_handler, _root_cache_directory # noqa: PLW0602 - if console_handler is not None or file_handler is not None: - logger.debug("Requested to create log handlers, but they are already created.") - return - - message_format = "[%(levelname)s] [%(asctime)s:%(name)s] %(message)s" - output_formatter = logging.Formatter(message_format, datefmt="%H:%M:%S") - - console_handler = logging.StreamHandler() - console_handler.setFormatter(output_formatter) - - if create_file_handler: - one_mb = 2**20 - log_path = _root_cache_directory / "openml_python.log" - file_handler = logging.handlers.RotatingFileHandler( - log_path, - maxBytes=one_mb, - backupCount=1, - delay=True, - ) - file_handler.setFormatter(output_formatter) - - -def _convert_log_levels(log_level: int) -> tuple[int, int]: - """Converts a log level that's either defined by OpenML/Python to both specifications.""" - # OpenML verbosity level don't match Python values directly: - openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG} - python_to_openml = { - logging.DEBUG: 2, - logging.INFO: 1, - logging.WARNING: 0, - logging.CRITICAL: 0, - logging.ERROR: 0, - } - # Because the dictionaries share no keys, we use `get` to convert as necessary: - openml_level = python_to_openml.get(log_level, log_level) - python_level = openml_to_python.get(log_level, log_level) - return openml_level, python_level - - -def _set_level_register_and_store(handler: logging.Handler, log_level: int) -> None: - """Set handler log level, register it if needed, save setting to config file if specified.""" - _oml_level, py_level = _convert_log_levels(log_level) - handler.setLevel(py_level) - - if openml_logger.level > py_level or openml_logger.level == logging.NOTSET: - openml_logger.setLevel(py_level) + super().__setattr__(name, value) - if handler not in openml_logger.handlers: - openml_logger.addHandler(handler) +class OpenMLConfigManager: + """The OpenMLConfigManager manages the configuration of the openml-python package.""" -def set_console_log_level(console_output_level: int) -> None: - """Set console output to the desired level and register it with openml logger if needed.""" - global console_handler # noqa: PLW0602 - assert console_handler is not None - _set_level_register_and_store(console_handler, console_output_level) + def __init__(self) -> None: + self.console_handler: logging.StreamHandler | None = None + self.file_handler: logging.handlers.RotatingFileHandler | None = None + self.OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" + self.OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" + self._TEST_SERVER_NORMAL_USER_KEY = "normaluser" -def set_file_log_level(file_output_level: int) -> None: - """Set file output to the desired level and register it with openml logger if needed.""" - global file_handler # noqa: PLW0602 - assert file_handler is not None - _set_level_register_and_store(file_handler, file_output_level) + self._user_path = Path("~").expanduser().absolute() + self._config: OpenMLConfig = OpenMLConfig() + self._root_cache_directory: Path = self._config.cachedir -_config: OpenMLConfig = OpenMLConfig() -_root_cache_directory: Path = _config.cachedir + self.logger = logger + self.openml_logger = openml_logger + self._examples = self.ConfigurationForExamples(self) -def __getattr__(name: str) -> Any: - if hasattr(_config, name): - return getattr(_config, name) - raise AttributeError(f"module 'openml.config' has no attribute '{name}'") - - -def get_server_base_url() -> str: - """Return the base URL of the currently configured server. - - Turns ``"https://api.openml.org/api/v1/xml"`` in ``"https://www.openml.org/"`` - and ``"https://test.openml.org/api/v1/xml"`` in ``"https://test.openml.org/"`` - - Returns - ------- - str - """ - domain, _ = _config.server.split("/api", maxsplit=1) - return domain.replace("api", "www") - - -def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None: - global _config - default_retries_by_policy = {"human": 5, "robot": 50} - - if value not in default_retries_by_policy: - raise ValueError( - f"Detected retry_policy '{value}' but must be one of " - f"{list(default_retries_by_policy.keys())}", - ) - if n_retries is not None and not isinstance(n_retries, int): - raise TypeError(f"`n_retries` must be of type `int` or `None` but is `{type(n_retries)}`.") - - if isinstance(n_retries, int) and n_retries < 1: - raise ValueError(f"`n_retries` is '{n_retries}' but must be positive.") - - _config = replace( - _config, - retry_policy=value, - connection_n_retries=(default_retries_by_policy[value] if n_retries is None else n_retries), - ) + self._setup() + def __getattr__(self, name: str) -> Any: + if hasattr(self._config, name): + return getattr(self._config, name) + raise AttributeError(f"{type(self).__name__!r} object has no attribute {name!r}") -class ConfigurationForExamples: - """Allows easy switching to and from a test configuration, used for examples.""" + _FIELDS = { # noqa: RUF012 + "apikey", + "server", + "cachedir", + "avoid_duplicate_runs", + "retry_policy", + "connection_n_retries", + "show_progress", + } - _last_used_server = None - _last_used_key = None - _start_last_called = False - _test_server = "https://test.openml.org/api/v1/xml" - _test_apikey = _TEST_SERVER_NORMAL_USER_KEY + def __setattr__(self, name: str, value: Any) -> None: + # during __init__ before _config exists + if name in { + "_config", + "_root_cache_directory", + "console_handler", + "file_handler", + "logger", + "openml_logger", + "_examples", + "OPENML_CACHE_DIR_ENV_VAR", + "OPENML_SKIP_PARQUET_ENV_VAR", + "_TEST_SERVER_NORMAL_USER_KEY", + "_user_path", + }: + return object.__setattr__(self, name, value) + + if name in self._FIELDS: + # write into dataclass, not manager (prevents shadowing) + if name == "cachedir": + object.__setattr__(self, "_root_cache_directory", Path(value)) + object.__setattr__(self, "_config", replace(self._config, **{name: value})) + return None + + object.__setattr__(self, name, value) + return None + + def _create_log_handlers(self, create_file_handler: bool = True) -> None: # noqa: FBT001, FBT002 + if self.console_handler is not None or self.file_handler is not None: + self.logger.debug("Requested to create log handlers, but they are already created.") + return - @classmethod - def start_using_configuration_for_example(cls) -> None: - """Sets the configuration to connect to the test server with valid apikey. + message_format = "[%(levelname)s] [%(asctime)s:%(name)s] %(message)s" + output_formatter = logging.Formatter(message_format, datefmt="%H:%M:%S") - To configuration as was before this call is stored, and can be recovered - by using the `stop_use_example_configuration` method. - """ - global _config + self.console_handler = logging.StreamHandler() + self.console_handler.setFormatter(output_formatter) - if ( - cls._start_last_called - and _config.server == cls._test_server - and _config.apikey == cls._test_apikey - ): - # Method is called more than once in a row without modifying the server or apikey. - # We don't want to save the current test configuration as a last used configuration. - return + if create_file_handler: + one_mb = 2**20 + log_path = self._root_cache_directory / "openml_python.log" + self.file_handler = logging.handlers.RotatingFileHandler( + log_path, + maxBytes=one_mb, + backupCount=1, + delay=True, + ) + self.file_handler.setFormatter(output_formatter) + + def _convert_log_levels(self, log_level: int) -> tuple[int, int]: + openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG} + python_to_openml = { + logging.DEBUG: 2, + logging.INFO: 1, + logging.WARNING: 0, + logging.CRITICAL: 0, + logging.ERROR: 0, + } + openml_level = python_to_openml.get(log_level, log_level) + python_level = openml_to_python.get(log_level, log_level) + return openml_level, python_level + + def _set_level_register_and_store(self, handler: logging.Handler, log_level: int) -> None: + _oml_level, py_level = self._convert_log_levels(log_level) + handler.setLevel(py_level) + + if self.openml_logger.level > py_level or self.openml_logger.level == logging.NOTSET: + self.openml_logger.setLevel(py_level) + + if handler not in self.openml_logger.handlers: + self.openml_logger.addHandler(handler) + + def set_console_log_level(self, console_output_level: int) -> None: + """Set the log level for console output.""" + assert self.console_handler is not None + self._set_level_register_and_store(self.console_handler, console_output_level) + + def set_file_log_level(self, file_output_level: int) -> None: + """Set the log level for file output.""" + assert self.file_handler is not None + self._set_level_register_and_store(self.file_handler, file_output_level) + + def get_server_base_url(self) -> str: + """Get the base URL of the OpenML server (i.e., without /api).""" + domain, _ = self._config.server.split("/api", maxsplit=1) + return domain.replace("api", "www") + + def set_retry_policy( + self, value: Literal["human", "robot"], n_retries: int | None = None + ) -> None: + """Set the retry policy for server connections.""" + default_retries_by_policy = {"human": 5, "robot": 50} + + if value not in default_retries_by_policy: + raise ValueError( + f"Detected retry_policy '{value}' but must be one of " + f"{list(default_retries_by_policy.keys())}", + ) + if n_retries is not None and not isinstance(n_retries, int): + raise TypeError( + f"`n_retries` must be of type `int` or `None` but is `{type(n_retries)}`." + ) - cls._last_used_server = _config.server - cls._last_used_key = _config.apikey - cls._start_last_called = True + if isinstance(n_retries, int) and n_retries < 1: + raise ValueError(f"`n_retries` is '{n_retries}' but must be positive.") - # Test server key for examples - _config = replace( - _config, - server=cls._test_server, - apikey=cls._test_apikey, - ) - warnings.warn( - f"Switching to the test server {_config.server} to not upload results to " - "the live server. Using the test server may result in reduced performance of the API!", - stacklevel=2, + self._config = replace( + self._config, + retry_policy=value, + connection_n_retries=( + default_retries_by_policy[value] if n_retries is None else n_retries + ), ) - @classmethod - def stop_using_configuration_for_example(cls) -> None: - """Return to configuration as it was before `start_use_example_configuration`.""" - if not cls._start_last_called: - # We don't want to allow this because it will (likely) result in the `server` and - # `apikey` variables being set to None. - raise RuntimeError( - "`stop_use_example_configuration` called without a saved config." - "`start_use_example_configuration` must be called first.", + def _handle_xdg_config_home_backwards_compatibility(self, xdg_home: str) -> Path: + config_dir = Path(xdg_home) / "openml" + + backwards_compat_config_file = Path(xdg_home) / "config" + if not backwards_compat_config_file.exists(): + return config_dir + + try: + self._parse_config(backwards_compat_config_file) + except Exception: # noqa: BLE001 + return config_dir + + correct_config_location = config_dir / "config" + try: + shutil.copy(backwards_compat_config_file, correct_config_location) + self.openml_logger.warning( + "An openml configuration file was found at the old location " + f"at {backwards_compat_config_file}. We have copied it to the new " + f"location at {correct_config_location}. " + "\nTo silence this warning please verify that the configuration file " + f"at {correct_config_location} is correct and delete the file at " + f"{backwards_compat_config_file}." + ) + return config_dir + except Exception as e: # noqa: BLE001 + self.openml_logger.warning( + "While attempting to perform a backwards compatible fix, we " + f"failed to copy the openml config file at " + f"{backwards_compat_config_file}' to {correct_config_location}" + f"\n{type(e)}: {e}", + "\n\nTo silence this warning, please copy the file " + "to the new location and delete the old file at " + f"{backwards_compat_config_file}.", + ) + return backwards_compat_config_file + + def determine_config_file_path(self) -> Path: + """Determine the path to the openml configuration file.""" + if platform.system().lower() == "linux": + xdg_home = os.environ.get("XDG_CONFIG_HOME") + if xdg_home is not None: + config_dir = self._handle_xdg_config_home_backwards_compatibility(xdg_home) + else: + config_dir = Path("~", ".config", "openml") + else: + config_dir = Path("~") / ".openml" + + config_dir = Path(config_dir).expanduser().resolve() + return config_dir / "config" + + def _parse_config(self, config_file: str | Path) -> dict[str, Any]: + config_file = Path(config_file) + config = configparser.RawConfigParser(defaults=OpenMLConfig().__dict__) # type: ignore + + config_file_ = StringIO() + config_file_.write("[FAKE_SECTION]\n") + try: + with config_file.open("r") as fh: + for line in fh: + config_file_.write(line) + except FileNotFoundError: + self.logger.info( + "No config file found at %s, using default configuration.", config_file + ) + except OSError as e: + self.logger.info("Error opening file %s: %s", config_file, e.args[0]) + config_file_.seek(0) + config.read_file(config_file_) + configuration = dict(config.items("FAKE_SECTION")) + for boolean_field in ["avoid_duplicate_runs", "show_progress"]: + if isinstance(config["FAKE_SECTION"][boolean_field], str): + configuration[boolean_field] = config["FAKE_SECTION"].getboolean(boolean_field) # type: ignore + return configuration # type: ignore + + def start_using_configuration_for_example(self) -> None: + """Sets the configuration to connect to the test server with valid apikey.""" + return self._examples.start_using_configuration_for_example() + + def stop_using_configuration_for_example(self) -> None: + """Store the configuration as it was before `start_use_example_configuration`.""" + return self._examples.stop_using_configuration_for_example() + + def _setup(self, config: dict[str, Any] | None = None) -> None: + config_file = self.determine_config_file_path() + config_dir = config_file.parent + + try: + if not config_dir.exists(): + config_dir.mkdir(exist_ok=True, parents=True) + except PermissionError: + self.openml_logger.warning( + f"No permission to create OpenML directory at {config_dir}!" + " This can result in OpenML-Python not working properly." ) - global _config - _config = replace( - _config, - server=cast("str", cls._last_used_server), - apikey=cast("str", cls._last_used_key), - ) - cls._start_last_called = False - - -def _handle_xdg_config_home_backwards_compatibility( - xdg_home: str, -) -> Path: - # NOTE(eddiebergman): A previous bug results in the config - # file being located at `${XDG_CONFIG_HOME}/config` instead - # of `${XDG_CONFIG_HOME}/openml/config`. As to maintain backwards - # compatibility, where users may already may have had a configuration, - # we copy it over an issue a warning until it's deleted. - # As a heurisitic to ensure that it's "our" config file, we try parse it first. - config_dir = Path(xdg_home) / "openml" - - backwards_compat_config_file = Path(xdg_home) / "config" - if not backwards_compat_config_file.exists(): - return config_dir - - # If it errors, that's a good sign it's not ours and we can - # safely ignore it, jumping out of this block. This is a heurisitc - try: - _parse_config(backwards_compat_config_file) - except Exception: # noqa: BLE001 - return config_dir - - # Looks like it's ours, lets try copy it to the correct place - correct_config_location = config_dir / "config" - try: - # We copy and return the new copied location - shutil.copy(backwards_compat_config_file, correct_config_location) - openml_logger.warning( - "An openml configuration file was found at the old location " - f"at {backwards_compat_config_file}. We have copied it to the new " - f"location at {correct_config_location}. " - "\nTo silence this warning please verify that the configuration file " - f"at {correct_config_location} is correct and delete the file at " - f"{backwards_compat_config_file}." + if config is None: + config = self._parse_config(config_file) + + self._config = replace( + self._config, + apikey=config["apikey"], + server=config["server"], + show_progress=config["show_progress"], + avoid_duplicate_runs=config["avoid_duplicate_runs"], + retry_policy=config["retry_policy"], + connection_n_retries=int(config["connection_n_retries"]), ) - return config_dir - except Exception as e: # noqa: BLE001 - # We failed to copy and its ours, return the old one. - openml_logger.warning( - "While attempting to perform a backwards compatible fix, we " - f"failed to copy the openml config file at " - f"{backwards_compat_config_file}' to {correct_config_location}" - f"\n{type(e)}: {e}", - "\n\nTo silence this warning, please copy the file " - "to the new location and delete the old file at " - f"{backwards_compat_config_file}.", - ) - return backwards_compat_config_file + self.set_retry_policy(config["retry_policy"], self._config.connection_n_retries) -def determine_config_file_path() -> Path: - if platform.system().lower() == "linux": - xdg_home = os.environ.get("XDG_CONFIG_HOME") - if xdg_home is not None: - config_dir = _handle_xdg_config_home_backwards_compatibility(xdg_home) + user_defined_cache_dir = os.environ.get(self.OPENML_CACHE_DIR_ENV_VAR) + if user_defined_cache_dir is not None: + short_cache_dir = Path(user_defined_cache_dir) else: - config_dir = Path("~", ".config", "openml") - else: - config_dir = Path("~") / ".openml" - - # Still use os.path.expanduser to trigger the mock in the unit test - config_dir = Path(config_dir).expanduser().resolve() - return config_dir / "config" - - -def _setup(config: dict[str, Any] | None = None) -> None: - """Setup openml package. Called on first import. - - Reads the config file and sets up apikey, server, cache appropriately. - key and server can be set by the user simply using - openml.config._config.apikey = THEIRKEY - openml.config._config.server = SOMESERVER - We could also make it a property but that's less clear. - """ - global _config - global _root_cache_directory - - config_file = determine_config_file_path() - config_dir = config_file.parent - - # read config file, create directory for config file - try: - if not config_dir.exists(): - config_dir.mkdir(exist_ok=True, parents=True) - except PermissionError: - openml_logger.warning( - f"No permission to create OpenML directory at {config_dir}!" - " This can result in OpenML-Python not working properly." - ) + short_cache_dir = Path(config["cachedir"]) + + self._root_cache_directory = short_cache_dir.expanduser().resolve() + self._config = replace(self._config, cachedir=self._root_cache_directory) + + try: + cache_exists = self._root_cache_directory.exists() + if not cache_exists: + self._root_cache_directory.mkdir(exist_ok=True, parents=True) + self._create_log_handlers() + except PermissionError: + self.openml_logger.warning( + f"No permission to create OpenML directory at {self._root_cache_directory}!" + " This can result in OpenML-Python not working properly." + ) + self._create_log_handlers(create_file_handler=False) + + def set_field_in_config_file(self, field: str, value: Any) -> None: + """Set a field in the configuration file.""" + if not hasattr(OpenMLConfig(), field): + raise ValueError( + f"Field '{field}' is not valid and must be one of " + f"'{OpenMLConfig().__dict__.keys()}'." + ) - if config is None: - config = _parse_config(config_file) - - _config = replace( - _config, - apikey=config["apikey"], - server=config["server"], - show_progress=config["show_progress"], - avoid_duplicate_runs=config["avoid_duplicate_runs"], - retry_policy=config["retry_policy"], - connection_n_retries=int(config["connection_n_retries"]), - ) + self._config = replace(self._config, **{field: value}) + config_file = self.determine_config_file_path() + existing = self._parse_config(config_file) + with config_file.open("w") as fh: + for f in OpenMLConfig().__dict__: + v = value if f == field else existing.get(f) + if v is not None: + fh.write(f"{f} = {v}\n") + + def get_config_as_dict(self) -> dict[str, Any]: + """Get the current configuration as a dictionary.""" + return self._config.__dict__.copy() + + def get_cache_directory(self) -> str: + """Get the cache directory for the current server.""" + url_suffix = urlparse(self._config.server).netloc + reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118 + return os.path.join(self._root_cache_directory, reversed_url_suffix) # noqa: PTH118 + + def set_root_cache_directory(self, root_cache_directory: str | Path) -> None: + """Set the root cache directory.""" + self._root_cache_directory = Path(root_cache_directory) + self._config = replace(self._config, cachedir=self._root_cache_directory) + + @contextmanager + def overwrite_config_context(self, config: dict[str, Any]) -> Iterator[dict[str, Any]]: + """Overwrite the current configuration within a context manager.""" + existing_config = self.get_config_as_dict() + merged_config = {**existing_config, **config} + + self._setup(merged_config) + yield merged_config + self._setup(existing_config) + + class ConfigurationForExamples: + """Allows easy switching to and from a test configuration, used for examples.""" + + _last_used_server = None + _last_used_key = None + _start_last_called = False + + def __init__(self, manager: OpenMLConfigManager): + self._manager = manager + self._test_apikey = manager._TEST_SERVER_NORMAL_USER_KEY + self._test_server = "https://test.openml.org/api/v1/xml" + + def start_using_configuration_for_example(self) -> None: + """Sets the configuration to connect to the test server with valid apikey. + + To configuration as was before this call is stored, and can be recovered + by using the `stop_use_example_configuration` method. + """ + if ( + self._start_last_called + and self._manager._config.server == self._test_server + and self._manager._config.apikey == self._test_apikey + ): + # Method is called more than once in a row without modifying the server or apikey. + # We don't want to save the current test configuration as a last used configuration. + return + + self._last_used_server = self._manager._config.server + self._last_used_key = self._manager._config.apikey + self._start_last_called = True + + # Test server key for examples + self._manager._config = replace( + self._manager._config, + server=self._test_server, + apikey=self._test_apikey, + ) + warnings.warn( + f"Switching to the test server {self._test_server} to not upload results to " + "the live server. Using the test server may result in reduced performance of the " + "API!", + stacklevel=2, + ) - set_retry_policy(config["retry_policy"], _config.connection_n_retries) + def stop_using_configuration_for_example(self) -> None: + """Return to configuration as it was before `start_use_example_configuration`.""" + if not self._start_last_called: + # We don't want to allow this because it will (likely) result in the `server` and + # `apikey` variables being set to None. + raise RuntimeError( + "`stop_use_example_configuration` called without a saved config." + "`start_use_example_configuration` must be called first.", + ) + + self._manager._config = replace( + self._manager._config, + server=cast("str", self._last_used_server), + apikey=cast("str", self._last_used_key), + ) + self._start_last_called = False - user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) - if user_defined_cache_dir is not None: - short_cache_dir = Path(user_defined_cache_dir) - else: - short_cache_dir = Path(config["cachedir"]) - - _root_cache_directory = short_cache_dir.expanduser().resolve() - _config = replace(_config, cachedir=_root_cache_directory) - - try: - cache_exists = _root_cache_directory.exists() - # create the cache subdirectory - if not cache_exists: - _root_cache_directory.mkdir(exist_ok=True, parents=True) - _create_log_handlers() - except PermissionError: - openml_logger.warning( - f"No permission to create OpenML directory at {_root_cache_directory}!" - " This can result in OpenML-Python not working properly." - ) - _create_log_handlers(create_file_handler=False) +_config = OpenMLConfigManager() -def set_field_in_config_file(field: str, value: Any) -> None: - """Overwrites the `field` in the configuration file with the new `value`.""" - global _config - if not hasattr(_config, field): - raise ValueError( - f"Field '{field}' is not valid and must be one of '{_config.__dict__.keys()}'." - ) - _config = replace(_config, **{field: value}) - config_file = determine_config_file_path() - existing = _parse_config(config_file) - with config_file.open("w") as fh: - for f in _config.__dict__: - # We can't blindly set all values based on globals() because when the user - # sets it through config.FIELD it should not be stored to file. - # There doesn't seem to be a way to avoid writing defaults to file with configparser, - # because it is impossible to distinguish from an explicitly set value that matches - # the default value, to one that was set to its default because it was omitted. - v = value if f == field else existing.get(f) - if v is not None: - fh.write(f"{f} = {v}\n") - - -def _parse_config(config_file: str | Path) -> dict[str, Any]: - """Parse the config file, set up defaults.""" - config_file = Path(config_file) - config = configparser.RawConfigParser(defaults=OpenMLConfig().__dict__) # type: ignore - - # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. - # Cheat the ConfigParser module by adding a fake section header - config_file_ = StringIO() - config_file_.write("[FAKE_SECTION]\n") - try: - with config_file.open("r") as fh: - for line in fh: - config_file_.write(line) - except FileNotFoundError: - logger.info("No config file found at %s, using default configuration.", config_file) - except OSError as e: - logger.info("Error opening file %s: %s", config_file, e.args[0]) - config_file_.seek(0) - config.read_file(config_file_) - configuration = dict(config.items("FAKE_SECTION")) - for boolean_field in ["avoid_duplicate_runs", "show_progress"]: - if isinstance(config["FAKE_SECTION"][boolean_field], str): - configuration[boolean_field] = config["FAKE_SECTION"].getboolean(boolean_field) # type: ignore - return configuration # type: ignore - - -def get_config_as_dict() -> dict[str, Any]: - return _config.__dict__.copy() - - -# NOTE: For backwards compatibility, we keep the `str` -def get_cache_directory() -> str: - """Get the current cache directory. - - This gets the cache directory for the current server relative - to the root cache directory that can be set via - ``set_root_cache_directory()``. The cache directory is the - ``root_cache_directory`` with additional information on which - subdirectory to use based on the server name. By default it is - ``root_cache_directory / org / openml / www`` for the standard - OpenML.org server and is defined as - ``root_cache_directory / top-level domain / second-level domain / - hostname`` - ``` - - Returns - ------- - cachedir : string - The current cache directory. - - """ - url_suffix = urlparse(_config.server).netloc - reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118 - return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118 - - -def set_root_cache_directory(root_cache_directory: str | Path) -> None: - """Set module-wide base cache directory. - - Sets the root cache directory, wherin the cache directories are - created to store content from different OpenML servers. For example, - by default, cached data for the standard OpenML.org server is stored - at ``root_cache_directory / org / openml / www``, and the general - pattern is ``root_cache_directory / top-level domain / second-level - domain / hostname``. - - Parameters - ---------- - root_cache_directory : string - Path to use as cache directory. - - See Also - -------- - get_cache_directory - """ - global _root_cache_directory, _config - _root_cache_directory = Path(root_cache_directory) - _config = replace(_config, cachedir=_root_cache_directory) - - -start_using_configuration_for_example = ( - ConfigurationForExamples.start_using_configuration_for_example -) -stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example - - -@contextmanager -def overwrite_config_context(config: dict[str, Any]) -> Iterator[dict[str, Any]]: - """A context manager to temporarily override variables in the configuration.""" - existing_config = get_config_as_dict() - merged_config = {**existing_config, **config} - - _setup(merged_config) # type: ignore - yield merged_config # type: ignore - - _setup(existing_config) - - -__all__ = [ - "get_cache_directory", - "get_config_as_dict", - "set_root_cache_directory", - "start_using_configuration_for_example", - "stop_using_configuration_for_example", -] - -_setup() +def __getattr__(name: str) -> Any: + return getattr(_config, name) diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 7fa560833..573d91576 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -18,7 +18,6 @@ import openml import openml._api_calls import openml.utils -from openml import config from openml.exceptions import ( OpenMLCacheException, OpenMLRunsExistError, @@ -107,7 +106,7 @@ def run_model_on_task( # noqa: PLR0913 """ if avoid_duplicate_runs is None: avoid_duplicate_runs = openml.config.avoid_duplicate_runs - if avoid_duplicate_runs and not config.apikey: + if avoid_duplicate_runs and not openml.config.apikey: warnings.warn( "avoid_duplicate_runs is set to True, but no API key is set. " "Please set your API key in the OpenML configuration file, see" @@ -226,7 +225,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 raise ValueError("flow_tags should be a list") if avoid_duplicate_runs is None: - avoid_duplicate_runs = openml.config._config.avoid_duplicate_runs + avoid_duplicate_runs = openml.config.avoid_duplicate_runs # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018). # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019). @@ -336,7 +335,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 message = f"Executed Task {task.task_id} with Flow id:{run.flow_id}" else: message = f"Executed Task {task.task_id} on local Flow with name {flow.name}." - config.logger.info(message) + openml.config.logger.info(message) return run @@ -528,7 +527,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 # The forked child process may not copy the configuration state of OpenML from the parent. # Current configuration setup needs to be copied and passed to the child processes. - _config = config.get_config_as_dict() + _config = openml.config.get_config_as_dict() # Execute runs in parallel # assuming the same number of tasks as workers (n_jobs), the total compute time for this # statement will be similar to the slowest run @@ -733,7 +732,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 """ # Sets up the OpenML instantiated in the child process to match that of the parent's # if configuration=None, loads the default - config._setup(configuration) + openml.config._setup(configuration) train_indices, test_indices = task.get_train_test_split_indices( repeat=rep_no, @@ -757,7 +756,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 else: raise NotImplementedError(task.task_type) - config.logger.info( + openml.config.logger.info( f"Going to run model {model!s} on " f"dataset {openml.datasets.get_dataset(task.dataset_id).name} " f"for repeat {rep_no} fold {fold_no} sample {sample_no}" diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 374911901..90dd73c06 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -14,7 +14,6 @@ import openml import openml.exceptions import openml.utils -from openml import config from openml.flows import OpenMLFlow, flow_exists from .setup import OpenMLParameter, OpenMLSetup @@ -84,7 +83,7 @@ def _get_cached_setup(setup_id: int) -> OpenMLSetup: OpenMLCacheException If the setup file for the given setup ID is not cached. """ - cache_dir = Path(config.get_cache_directory()) + cache_dir = Path(openml.config.get_cache_directory()) setup_cache_dir = cache_dir / "setups" / str(setup_id) try: setup_file = setup_cache_dir / "description.xml" @@ -112,7 +111,7 @@ def get_setup(setup_id: int) -> OpenMLSetup: ------- OpenMLSetup (an initialized openml setup object) """ - setup_dir = Path(config.get_cache_directory()) / "setups" / str(setup_id) + setup_dir = Path(openml.config.get_cache_directory()) / "setups" / str(setup_id) setup_dir.mkdir(exist_ok=True, parents=True) setup_file = setup_dir / "description.xml" diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 395b52482..304bab544 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -10,8 +10,8 @@ from typing import TYPE_CHECKING, Any, Sequence from typing_extensions import TypedDict +import openml import openml._api_calls -import openml.config from openml import datasets from openml.base import OpenMLBase from openml.utils import _create_cache_directory_for_id diff --git a/openml/testing.py b/openml/testing.py index fbf7edf44..d1da16876 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -99,13 +99,13 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: os.chdir(self.workdir) self.cached = True - openml.config._config.apikey = TestBase.user_key + openml.config.apikey = TestBase.user_key self.production_server = "https://www.openml.org/api/v1/xml" openml.config.set_root_cache_directory(str(self.workdir)) # Increase the number of retries to avoid spurious server failures - self.retry_policy = openml.config._config.retry_policy - self.connection_n_retries = openml.config._config.connection_n_retries + self.retry_policy = openml.config.retry_policy + self.connection_n_retries = openml.config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) def use_production_server(self) -> None: @@ -114,8 +114,8 @@ def use_production_server(self) -> None: Please use this sparingly - it is better to use the test server. """ - openml.config._config.server = self.production_server - openml.config._config.apikey = "" + openml.config.server = self.production_server + openml.config.apikey = "" def tearDown(self) -> None: """Tear down the test""" @@ -127,8 +127,8 @@ def tearDown(self) -> None: # one of the files may still be used by another process raise e - openml.config._config.connection_n_retries = self.connection_n_retries - openml.config._config.retry_policy = self.retry_policy + openml.config.connection_n_retries = self.connection_n_retries + openml.config.retry_policy = self.retry_policy @classmethod def _mark_entity_for_removal( diff --git a/openml/utils.py b/openml/utils.py index 7e72e7aee..f4a78fa44 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -18,8 +18,6 @@ import openml._api_calls import openml.exceptions -from . import config - # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: from openml.base import OpenMLBase @@ -328,7 +326,7 @@ def _list_all( # noqa: C901 def _get_cache_dir_for_key(key: str) -> Path: - return Path(config.get_cache_directory()) / key + return Path(openml.config.get_cache_directory()) / key def _create_cache_directory(key: str) -> Path: @@ -428,7 +426,7 @@ def safe_func(*args: P.args, **kwargs: P.kwargs) -> R: def _create_lockfiles_dir() -> Path: - path = Path(config.get_cache_directory()) / "locks" + path = Path(openml.config.get_cache_directory()) / "locks" # TODO(eddiebergman): Not sure why this is allowed to error and ignore??? with contextlib.suppress(OSError): path.mkdir(exist_ok=True, parents=True) diff --git a/tests/conftest.py b/tests/conftest.py index ba7c65813..bd974f3f3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,8 +97,8 @@ def delete_remote_files(tracker, flow_names) -> None: :param tracker: Dict :return: None """ - openml.config._config.server = TestBase.test_server - openml.config._config.apikey = TestBase.user_key + openml.config.server = TestBase.test_server + openml.config.apikey = TestBase.user_key # reordering to delete sub flows at the end of flows # sub-flows have shorter names, hence, sorting by descending order of flow name length @@ -263,8 +263,8 @@ def verify_cache_state(test_files_directory) -> Iterator[None]: @pytest.fixture(autouse=True, scope="session") def as_robot() -> Iterator[None]: - policy = openml.config._config.retry_policy - n_retries = openml.config._config.connection_n_retries + policy = openml.config.retry_policy + n_retries = openml.config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) yield openml.config.set_retry_policy(policy, n_retries) @@ -273,12 +273,12 @@ def as_robot() -> Iterator[None]: @pytest.fixture(autouse=True) def with_server(request): if "production" in request.keywords: - openml.config._config.server = "https://www.openml.org/api/v1/xml" - openml.config._config.apikey = None + openml.config.server = "https://www.openml.org/api/v1/xml" + openml.config.apikey = None yield return - openml.config._config.server = "https://test.openml.org/api/v1/xml" - openml.config._config.apikey = TestBase.user_key + openml.config.server = "https://test.openml.org/api/v1/xml" + openml.config.apikey = TestBase.user_key yield diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index f1f9e6346..f8cb1943c 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -153,7 +153,7 @@ def test_check_datasets_active(self): openml.datasets.check_datasets_active, [79], ) - openml.config._config.server = self.test_server + openml.config.server = self.test_server def test_illegal_character_tag(self): dataset = openml.datasets.get_dataset(1) @@ -179,7 +179,7 @@ def test__name_to_id_with_deactivated(self): self.use_production_server() # /d/1 was deactivated assert openml.datasets.functions._name_to_id("anneal") == 2 - openml.config._config.server = self.test_server + openml.config.server = self.test_server @pytest.mark.production() def test__name_to_id_with_multiple_active(self): @@ -418,8 +418,8 @@ def test__getarff_md5_issue(self): "oml:md5_checksum": "abc", "oml:url": "https://www.openml.org/data/download/61", } - n = openml.config._config.connection_n_retries - openml.config._config.connection_n_retries = 1 + n = openml.config.connection_n_retries + openml.config.connection_n_retries = 1 self.assertRaisesRegex( OpenMLHashException, @@ -429,7 +429,7 @@ def test__getarff_md5_issue(self): description, ) - openml.config._config.connection_n_retries = n + openml.config.connection_n_retries = n def test__get_dataset_features(self): features_file = _get_dataset_features_file(self.workdir, 2) @@ -589,7 +589,7 @@ def test_data_status(self): # admin key for test server (only admins can activate datasets. # all users can deactivate their own datasets) - openml.config._config.apikey = TestBase.admin_key + openml.config.apikey = TestBase.admin_key openml.datasets.status_update(did, "active") self._assert_status_of_dataset(did=did, status="active") @@ -1516,7 +1516,7 @@ def test_list_datasets_with_high_size_parameter(self): datasets_b = openml.datasets.list_datasets(size=np.inf) # Reverting to test server - openml.config._config.server = self.test_server + openml.config.server = self.test_server assert len(datasets_a) == len(datasets_b) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 104639460..282838414 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -110,26 +110,25 @@ class TestConfigurationForExamples(openml.testing.TestBase): def test_switch_to_example_configuration(self): """Verifies the test configuration is loaded properly.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config._config.apikey = TestBase.admin_key - openml.config._config.server = self.production_server + openml.config.apikey = TestBase.admin_key + openml.config.server = self.production_server openml.config.start_using_configuration_for_example() - assert openml.config._config.apikey == TestBase.user_key - assert openml.config._config.server == self.test_server + assert openml.config.apikey == TestBase.user_key + assert openml.config.server == self.test_server @pytest.mark.production() def test_switch_from_example_configuration(self): """Verifies the previous configuration is loaded after stopping.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config._config.apikey = TestBase.user_key - openml.config._config.server = self.production_server + openml.config.apikey = TestBase.user_key + openml.config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - - assert openml.config._config.apikey == TestBase.user_key - assert openml.config._config.server == self.production_server + assert openml.config.apikey == TestBase.user_key + assert openml.config.server == self.production_server def test_example_configuration_stop_before_start(self): """Verifies an error is raised if `stop_...` is called before `start_...`.""" @@ -146,15 +145,15 @@ def test_example_configuration_stop_before_start(self): @pytest.mark.production() def test_example_configuration_start_twice(self): """Checks that the original config can be returned to if `start..` is called twice.""" - openml.config._config.apikey = TestBase.user_key - openml.config._config.server = self.production_server + openml.config.apikey = TestBase.user_key + openml.config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - assert openml.config._config.apikey == TestBase.user_key - assert openml.config._config.server == self.production_server + assert openml.config.apikey == TestBase.user_key + assert openml.config.server == self.production_server def test_configuration_file_not_overwritten_on_load(): diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 1c0b50fe5..35be84903 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -44,7 +44,7 @@ def min_number_evaluations_on_test_server() -> int: def _mocked_perform_api_call(call, request_method): - url = openml.config._config.server + "/" + call + url = openml.config.server + "/" + call return openml._api_calls._download_text_file(url) From 7a67bf01834ef0d5ba4075c612de6a3554d2d82b Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 5 Jan 2026 17:30:53 +0530 Subject: [PATCH 010/156] Update introduction_tutorial.py --- examples/Basics/introduction_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Basics/introduction_tutorial.py b/examples/Basics/introduction_tutorial.py index 4b972b95b..c864772f5 100644 --- a/examples/Basics/introduction_tutorial.py +++ b/examples/Basics/introduction_tutorial.py @@ -52,4 +52,4 @@ # %% import openml -openml.config.set_root_cache_directory("YOURDIR") +openml.config.set_root_cache_directory("YOURDIR") \ No newline at end of file From 5dfcbce55a027d19cd502ea7bb3d521c2b1bca29 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 7 Jan 2026 22:14:31 +0500 Subject: [PATCH 011/156] refactor --- openml/_api/config.py | 62 +++++++++++++++++++++++++++++++++++-- openml/_api/http/client.py | 18 +++++++---- openml/_api/runtime/core.py | 9 ++---- 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index bd93c3cad..1431f66b1 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -1,5 +1,61 @@ from __future__ import annotations -API_V1_SERVER = "https://www.openml.org/api/v1/xml" -API_V2_SERVER = "http://127.0.0.1:8001" -API_KEY = "..." +from dataclasses import dataclass +from typing import Literal + +DelayMethod = Literal["human", "robot"] + + +@dataclass +class APIConfig: + server: str + base_url: str + key: str + + +@dataclass +class APISettings: + v1: APIConfig + v2: APIConfig + + +@dataclass +class ConnectionConfig: + retries: int = 3 + delay_method: DelayMethod = "human" + delay_time: int = 1 # seconds + + def __post_init__(self) -> None: + if self.delay_method not in ("human", "robot"): + raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}") + + +@dataclass +class CacheConfig: + dir: str = "~/.openml/cache" + ttl: int = 60 * 60 * 24 * 7 # one week + + +@dataclass +class Settings: + api: APISettings + connection: ConnectionConfig + cache: CacheConfig + + +settings = Settings( + api=APISettings( + v1=APIConfig( + server="https://www.openml.org/", + base_url="api/v1/xml/", + key="...", + ), + v2=APIConfig( + server="http://127.0.0.1:8001/", + base_url="", + key="...", + ), + ), + connection=ConnectionConfig(), + cache=CacheConfig(), +) diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index dea5de809..74e08c709 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,24 +1,30 @@ from __future__ import annotations -from typing import Any, Mapping +from typing import TYPE_CHECKING, Any, Mapping import requests from requests import Response from openml.__version__ import __version__ +if TYPE_CHECKING: + from openml._api.config import APIConfig + class HTTPClient: - def __init__(self, base_url: str) -> None: - self.base_url = base_url + def __init__(self, config: APIConfig) -> None: + self.config = config self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + def _create_url(self, path: str) -> str: + return self.config.server + self.config.base_url + path + def get( self, path: str, params: Mapping[str, Any] | None = None, ) -> Response: - url = f"{self.base_url}/{path}" + url = self._create_url(path) return requests.get(url, params=params, headers=self.headers, timeout=10) def post( @@ -27,7 +33,7 @@ def post( data: Mapping[str, Any] | None = None, files: Any = None, ) -> Response: - url = f"{self.base_url}/{path}" + url = self._create_url(path) return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) def delete( @@ -35,5 +41,5 @@ def delete( path: str, params: Mapping[str, Any] | None = None, ) -> Response: - url = f"{self.base_url}/{path}" + url = self._create_url(path) return requests.delete(url, params=params, headers=self.headers, timeout=10) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index aa09a69db..98b587411 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -2,10 +2,7 @@ from typing import TYPE_CHECKING -from openml._api.config import ( - API_V1_SERVER, - API_V2_SERVER, -) +from openml._api.config import settings from openml._api.http.client import HTTPClient from openml._api.resources import ( DatasetsV1, @@ -25,8 +22,8 @@ def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): def build_backend(version: str, *, strict: bool) -> APIBackend: - v1_http = HTTPClient(API_V1_SERVER) - v2_http = HTTPClient(API_V2_SERVER) + v1_http = HTTPClient(config=settings.api.v1) + v2_http = HTTPClient(config=settings.api.v2) v1 = APIBackend( datasets=DatasetsV1(v1_http), From 2acbe9992cf95bfc103ff4fa0c360a58c1842870 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 7 Jan 2026 22:24:03 +0500 Subject: [PATCH 012/156] implement cache_dir --- openml/_api/http/client.py | 74 +++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index 74e08c709..49b05c88e 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,36 +1,93 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Mapping +from pathlib import Path +from typing import TYPE_CHECKING, Any +from urllib.parse import urlencode, urljoin, urlparse import requests from requests import Response from openml.__version__ import __version__ +from openml._api.config import settings if TYPE_CHECKING: from openml._api.config import APIConfig -class HTTPClient: +class CacheMixin: + @property + def dir(self) -> str: + return settings.cache.dir + + @property + def ttl(self) -> int: + return settings.cache.ttl + + def _get_cache_directory(self, url: str, params: dict[str, Any]) -> Path: + parsed_url = urlparse(url) + netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain + path_parts = parsed_url.path.strip("/").split("/") + + # remove api_key and serialize params if any + filtered_params = {k: v for k, v in params.items() if k != "api_key"} + params_part = [urlencode(filtered_params)] if filtered_params else [] + + return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) + + def _get_cache_response(self, url: str, params: dict[str, Any]) -> Response | None: # noqa: ARG002 + return None + + def _set_cache_response(self, url: str, params: dict[str, Any], response: Response) -> None: # noqa: ARG002 + return None + + +class HTTPClient(CacheMixin): def __init__(self, config: APIConfig) -> None: self.config = config self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - def _create_url(self, path: str) -> str: - return self.config.server + self.config.base_url + path + @property + def server(self) -> str: + return self.config.server + + @property + def base_url(self) -> str: + return self.config.base_url + + def _create_url(self, path: str) -> Any: + return urljoin(self.server, urljoin(self.base_url, path)) def get( self, path: str, - params: Mapping[str, Any] | None = None, + *, + params: dict[str, Any] | None = None, + use_cache: bool = False, + use_api_key: bool = False, ) -> Response: url = self._create_url(path) - return requests.get(url, params=params, headers=self.headers, timeout=10) + params = dict(params) if params is not None else {} + + if use_api_key: + params["api_key"] = self.config.key + + if use_cache: + response = self._get_cache_response(url, params) + if response: + return response + + response = requests.get(url, params=params, headers=self.headers, timeout=10) + + if use_cache: + self._set_cache_response(url, params, response) + + return response def post( self, path: str, - data: Mapping[str, Any] | None = None, + *, + data: dict[str, Any] | None = None, files: Any = None, ) -> Response: url = self._create_url(path) @@ -39,7 +96,8 @@ def post( def delete( self, path: str, - params: Mapping[str, Any] | None = None, + *, + params: dict[str, Any] | None = None, ) -> Response: url = self._create_url(path) return requests.delete(url, params=params, headers=self.headers, timeout=10) From af99880a9e16a49833c63084c9e9267c112b6b91 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 7 Jan 2026 23:42:17 +0500 Subject: [PATCH 013/156] refactor --- openml/_api/config.py | 1 + openml/_api/http/client.py | 100 +++++++++++++++++++++++++++---------- 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 1431f66b1..848fe8da1 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -11,6 +11,7 @@ class APIConfig: server: str base_url: str key: str + timeout: int = 10 # seconds @dataclass diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index 49b05c88e..a90e93933 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -23,7 +23,7 @@ def dir(self) -> str: def ttl(self) -> int: return settings.cache.ttl - def _get_cache_directory(self, url: str, params: dict[str, Any]) -> Path: + def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path: parsed_url = urlparse(url) netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain path_parts = parsed_url.path.strip("/").split("/") @@ -34,10 +34,10 @@ def _get_cache_directory(self, url: str, params: dict[str, Any]) -> Path: return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) - def _get_cache_response(self, url: str, params: dict[str, Any]) -> Response | None: # noqa: ARG002 - return None + def _get_cache_response(self, cache_dir: Path) -> Response: # noqa: ARG002 + return Response() - def _set_cache_response(self, url: str, params: dict[str, Any], response: Response) -> None: # noqa: ARG002 + def _set_cache_response(self, cache_dir: Path, response: Response) -> None: # noqa: ARG002 return None @@ -54,50 +54,98 @@ def server(self) -> str: def base_url(self) -> str: return self.config.base_url - def _create_url(self, path: str) -> Any: - return urljoin(self.server, urljoin(self.base_url, path)) + @property + def key(self) -> str: + return self.config.key - def get( + @property + def timeout(self) -> int: + return self.config.timeout + + def request( self, + method: str, path: str, *, - params: dict[str, Any] | None = None, use_cache: bool = False, use_api_key: bool = False, + **request_kwargs: Any, ) -> Response: - url = self._create_url(path) - params = dict(params) if params is not None else {} + url = urljoin(self.server, urljoin(self.base_url, path)) + params = request_kwargs.pop("params", {}) + params = params.copy() if use_api_key: - params["api_key"] = self.config.key + params["api_key"] = self.key - if use_cache: - response = self._get_cache_response(url, params) - if response: - return response + headers = request_kwargs.pop("headers", {}) + headers = headers.copy() + headers.update(self.headers) + + timeout = request_kwargs.pop("timeout", self.timeout) + cache_dir = self._get_cache_dir(url, params) - response = requests.get(url, params=params, headers=self.headers, timeout=10) + if use_cache: + try: + return self._get_cache_response(cache_dir) + # TODO: handle ttl expired error + except Exception: + raise + + response = requests.request( + method=method, + url=url, + params=params, + headers=headers, + timeout=timeout, + **request_kwargs, + ) if use_cache: - self._set_cache_response(url, params, response) + self._set_cache_response(cache_dir, response) return response - def post( + def get( self, path: str, *, - data: dict[str, Any] | None = None, - files: Any = None, + use_cache: bool = False, + use_api_key: bool = False, + **request_kwargs: Any, ) -> Response: - url = self._create_url(path) - return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) + # TODO: remove override when cache is implemented + use_cache = False + return self.request( + method="GET", + path=path, + use_cache=use_cache, + use_api_key=use_api_key, + **request_kwargs, + ) + + def post( + self, + path: str, + **request_kwargs: Any, + ) -> Response: + return self.request( + method="POST", + path=path, + use_cache=False, + use_api_key=True, + **request_kwargs, + ) def delete( self, path: str, - *, - params: dict[str, Any] | None = None, + **request_kwargs: Any, ) -> Response: - url = self._create_url(path) - return requests.delete(url, params=params, headers=self.headers, timeout=10) + return self.request( + method="DELETE", + path=path, + use_cache=False, + use_api_key=True, + **request_kwargs, + ) From 4241624d6ed0b0e563079d269c6e3dbac185bd63 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Tue, 13 Jan 2026 02:11:54 +0530 Subject: [PATCH 014/156] bug fixing --- openml/config.py | 8 +++++--- tests/test_openml/test_config.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/openml/config.py b/openml/config.py index 2ecb3c64f..06127fcac 100644 --- a/openml/config.py +++ b/openml/config.py @@ -87,6 +87,8 @@ def __init__(self) -> None: self._user_path = Path("~").expanduser().absolute() self._config: OpenMLConfig = OpenMLConfig() + # for legacy test `test_non_writable_home` + self._defaults: dict[str, Any] = OpenMLConfig().__dict__.copy() self._root_cache_directory: Path = self._config.cachedir self.logger = logger @@ -427,7 +429,7 @@ def start_using_configuration_for_example(self) -> None: self._last_used_server = self._manager._config.server self._last_used_key = self._manager._config.apikey - self._start_last_called = True + type(self)._start_last_called = True # Test server key for examples self._manager._config = replace( @@ -444,7 +446,7 @@ def start_using_configuration_for_example(self) -> None: def stop_using_configuration_for_example(self) -> None: """Return to configuration as it was before `start_use_example_configuration`.""" - if not self._start_last_called: + if not type(self)._start_last_called: # We don't want to allow this because it will (likely) result in the `server` and # `apikey` variables being set to None. raise RuntimeError( @@ -457,7 +459,7 @@ def stop_using_configuration_for_example(self) -> None: server=cast("str", self._last_used_server), apikey=cast("str", self._last_used_key), ) - self._start_last_called = False + type(self)._start_last_called = False _config = OpenMLConfigManager() diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 282838414..9ac4a059e 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -46,7 +46,7 @@ class TestConfig(openml.testing.TestBase): def test_non_writable_home(self, log_handler_mock, warnings_mock): with tempfile.TemporaryDirectory(dir=self.workdir) as td: os.chmod(td, 0o444) - _dd = copy(openml.config.OpenMLConfig().__dict__) + _dd = copy(openml.config._defaults) _dd["cachedir"] = Path(td) / "something-else" openml.config._setup(_dd) From f01c1e977b9b0a4297107bd3c70b91b29ae920e4 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Tue, 13 Jan 2026 02:14:18 +0530 Subject: [PATCH 015/156] Update test_utils.py --- tests/test_utils/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index a1cdb55ea..0d4a4e3c2 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -124,7 +124,7 @@ def test_list_all_few_results_available(_perform_api_call): @unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033") -@unittest.mock.patch("openml.config.get_cache_directory") +@unittest.mock.patch("openml.utils.openml.config.get_cache_directory") def test__create_cache_directory(config_mock, tmp_path): config_mock.return_value = tmp_path openml.utils._create_cache_directory("abc") From 07cc1c83ee8651ed9debc2bddf3a350bda3c15bd Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Tue, 13 Jan 2026 02:24:02 +0530 Subject: [PATCH 016/156] Update test_config.py --- tests/test_openml/test_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 9ac4a059e..bc3ff0a23 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -37,7 +37,7 @@ def safe_environ_patcher(key: str, value: Any) -> Iterator[None]: class TestConfig(openml.testing.TestBase): @unittest.mock.patch("openml.config.openml_logger.warning") - @unittest.mock.patch("openml.config._create_log_handlers") + @unittest.mock.patch("openml.config.OpenMLConfigManager._create_log_handlers") @unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033") @unittest.skipIf( platform.uname().release.endswith(("-Microsoft", "microsoft-standard-WSL2")), From 4c75e16890a76d8fbc0ddc125a267d23ddaded44 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 15 Jan 2026 14:51:22 +0500 Subject: [PATCH 017/156] undo changes in tasks/functions.py --- openml/tasks/functions.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index a794ad56d..e9b879ae4 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -12,7 +12,6 @@ import openml._api_calls import openml.utils -from openml._api import api_context from openml.datasets import get_dataset from openml.exceptions import OpenMLCacheException @@ -445,16 +444,11 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - result = api_context.backend.tasks.get(task_id, return_response=True) + task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get") - if isinstance(result, tuple): - task, response = result - with xml_file.open("w", encoding="utf8") as fh: - fh.write(response.text) - else: - task = result - - return task + with xml_file.open("w", encoding="utf8") as fh: + fh.write(task_xml) + return _create_task_from_xml(task_xml) def _create_task_from_xml(xml: str) -> OpenMLTask: From 021a1e12d572d332a863bddeae0b8ab46cd5d922 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:36:54 +0530 Subject: [PATCH 018/156] made requested changes --- openml/__init__.py | 6 +++--- openml/{config.py => _config.py} | 2 -- openml/cli.py | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) rename openml/{config.py => _config.py} (99%) diff --git a/openml/__init__.py b/openml/__init__.py index 38fb232ae..efb9ead83 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -22,7 +22,7 @@ from . import ( _api_calls, - config as _config_module, + _config as _config_module, datasets, evaluations, exceptions, @@ -52,7 +52,7 @@ ) if TYPE_CHECKING: - from .config import OpenMLConfigManager + from ._config import OpenMLConfigManager config: OpenMLConfigManager = _config_module._config @@ -116,7 +116,7 @@ def populate_cache( "OpenMLTask", "__version__", "_api_calls", - "config", + "_config", "datasets", "evaluations", "exceptions", diff --git a/openml/config.py b/openml/_config.py similarity index 99% rename from openml/config.py rename to openml/_config.py index da4463c52..9dd75c989 100644 --- a/openml/config.py +++ b/openml/_config.py @@ -334,8 +334,6 @@ def _setup(self, config: dict[str, Any] | None = None) -> None: connection_n_retries=int(config["connection_n_retries"]), ) - self.set_retry_policy(config["retry_policy"], self._config.connection_n_retries) - user_defined_cache_dir = os.environ.get(self.OPENML_CACHE_DIR_ENV_VAR) if user_defined_cache_dir is not None: short_cache_dir = Path(user_defined_cache_dir) diff --git a/openml/cli.py b/openml/cli.py index b594eb623..c8740fd0e 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -6,11 +6,10 @@ import string import sys from collections.abc import Callable +from dataclasses import fields from pathlib import Path from urllib.parse import urlparse -from attr import fields - from openml import config From 1d9122039619423c56a0ba7eecd0215ed2545f24 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:38:50 +0530 Subject: [PATCH 019/156] made requested changes --- openml/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/__init__.py b/openml/__init__.py index efb9ead83..d5cb99fd9 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -116,7 +116,7 @@ def populate_cache( "OpenMLTask", "__version__", "_api_calls", - "_config", + "config", "datasets", "evaluations", "exceptions", From 0060b2e69480354975518e4a6213b5906df487a5 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:56:29 +0530 Subject: [PATCH 020/156] fixed bugs --- tests/test_openml/test_api_calls.py | 1 - tests/test_openml/test_config.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py index a295259ef..6b1cc64b1 100644 --- a/tests/test_openml/test_api_calls.py +++ b/tests/test_openml/test_api_calls.py @@ -9,7 +9,6 @@ import pytest import openml -from openml.config import ConfigurationForExamples import openml.testing from openml._api_calls import _download_minio_bucket, API_TOKEN_HELP_LINK diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index bc3ff0a23..c3d931ea1 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -12,7 +12,7 @@ import pytest -import openml.config +import openml import openml.testing from openml.testing import TestBase From 65ba66b5c14c5736881b5786e77fdae780c8e095 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:58:24 +0530 Subject: [PATCH 021/156] fixed bugs --- openml/_api_calls.py | 19 ++++++++++--------- openml/base.py | 2 +- openml/datasets/dataset.py | 6 ++++-- openml/datasets/functions.py | 6 ++++-- openml/evaluations/evaluation.py | 2 +- openml/runs/functions.py | 2 +- openml/setups/setup.py | 2 +- openml/study/functions.py | 2 +- openml/study/study.py | 4 ++-- .../test_evaluations_example.py | 5 ++--- 10 files changed, 27 insertions(+), 23 deletions(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 3ccd03a27..f920ae60a 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -20,7 +20,8 @@ import xmltodict from urllib3 import ProxyManager -from . import config +import openml + from .__version__ import __version__ from .exceptions import ( OpenMLHashException, @@ -71,7 +72,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url = cast("str", config.server) + url = cast("str", openml.config.server) if not url.endswith("/"): url += "/" url += endpoint @@ -172,7 +173,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if config.show_progress else None, + progress=ProgressBar() if openml.config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -301,7 +302,7 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_server = cast("str", config.server) + openml_server = cast("str", openml.config.server) openml_url = openml_server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: @@ -318,7 +319,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = config.apikey + data["api_key"] = openml.config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -338,8 +339,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if config.apikey: - data["api_key"] = config.apikey + if openml.config.apikey: + data["api_key"] = openml.config.apikey return _send_request( request_method=request_method, url=url, @@ -364,10 +365,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, config.connection_n_retries) + n_retries = max(1, openml.config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if openml.config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/base.py b/openml/base.py index a282be8eb..f79bc2931 100644 --- a/openml/base.py +++ b/openml/base.py @@ -8,8 +8,8 @@ import xmltodict +import openml import openml._api_calls -import openml.config from .utils import _get_rest_api_type_alias, _tag_openml_base diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index a77fd1040..bce9c07b4 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -17,8 +17,8 @@ import scipy.sparse import xmltodict +import openml from openml.base import OpenMLBase -from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from .data_feature import OpenMLDataFeature @@ -375,7 +375,9 @@ def _download_data(self) -> None: # import required here to avoid circular import. from .functions import _get_dataset_arff, _get_dataset_parquet - skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + skip_parquet = ( + os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + ) if self._parquet_url is not None and not skip_parquet: parquet_file = _get_dataset_parquet(self) self.parquet_file = None if parquet_file is None else str(parquet_file) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 3ac657ea0..432938520 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -19,9 +19,9 @@ import xmltodict from scipy.sparse import coo_matrix +import openml import openml._api_calls import openml.utils -from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from openml.exceptions import ( OpenMLHashException, OpenMLPrivateDatasetError, @@ -492,7 +492,9 @@ def get_dataset( # noqa: C901, PLR0912 qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id) parquet_file = None - skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + skip_parquet = ( + os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + ) download_parquet = "oml:parquet_url" in description and not skip_parquet if download_parquet and (download_data or download_all_files): try: diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py index 5db087024..e15bf728a 100644 --- a/openml/evaluations/evaluation.py +++ b/openml/evaluations/evaluation.py @@ -3,7 +3,7 @@ from dataclasses import asdict, dataclass -import openml.config +import openml import openml.datasets import openml.flows import openml.runs diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 4eb173a31..b8eb739ae 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -44,7 +44,7 @@ # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: - from openml.config import _Config + from openml._config import _Config from openml.extensions.extension_interface import Extension # get_dict is in run.py to avoid circular imports diff --git a/openml/setups/setup.py b/openml/setups/setup.py index 0960ad4c1..7ea44a19f 100644 --- a/openml/setups/setup.py +++ b/openml/setups/setup.py @@ -3,7 +3,7 @@ from typing import Any -import openml.config +import openml import openml.flows diff --git a/openml/study/functions.py b/openml/study/functions.py index bb24ddcff..367537773 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -8,8 +8,8 @@ import pandas as pd import xmltodict +import openml import openml._api_calls -import openml.config import openml.utils from openml.study.study import OpenMLBenchmarkSuite, OpenMLStudy diff --git a/openml/study/study.py b/openml/study/study.py index 7a9c80bbe..803c6455b 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -5,8 +5,8 @@ from collections.abc import Sequence from typing import Any +import openml from openml.base import OpenMLBase -from openml.config import get_server_base_url class BaseStudy(OpenMLBase): @@ -111,7 +111,7 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str]]]: fields["ID"] = self.study_id fields["Study URL"] = self.openml_url if self.creator is not None: - fields["Creator"] = f"{get_server_base_url()}/u/{self.creator}" + fields["Creator"] = f"{openml.config.get_server_base_url()}/u/{self.creator}" if self.creation_date is not None: fields["Upload Time"] = self.creation_date.replace("T", " ") if self.data is not None: diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py index a9ad7e8c1..5a2d233ce 100644 --- a/tests/test_evaluations/test_evaluations_example.py +++ b/tests/test_evaluations/test_evaluations_example.py @@ -3,14 +3,13 @@ import unittest -from openml.config import overwrite_config_context - +import openml class TestEvaluationsExample(unittest.TestCase): def test_example_python_paper(self): # Example script which will appear in the upcoming OpenML-Python paper # This test ensures that the example will keep running! - with overwrite_config_context( + with openml.config.overwrite_config_context( # noqa: F823 { "server": "https://www.openml.org/api/v1/xml", "apikey": None, From 317c6e9fc9c93809628fe8301a0ca509e00b00a6 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 15 Jan 2026 22:52:54 +0530 Subject: [PATCH 022/156] fixed bugs --- tests/test_evaluations/test_evaluations_example.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py index 5a2d233ce..b321f475d 100644 --- a/tests/test_evaluations/test_evaluations_example.py +++ b/tests/test_evaluations/test_evaluations_example.py @@ -17,7 +17,6 @@ def test_example_python_paper(self): ): import matplotlib.pyplot as plt import numpy as np - import openml df = openml.evaluations.list_evaluations_setups( "predictive_accuracy", From 503ab828448baf90b57541da332ba151f0aa769e Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 15 Jan 2026 22:53:22 +0530 Subject: [PATCH 023/156] fixed bugs --- tests/test_openml/test_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index c3d931ea1..e39be87a6 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -37,7 +37,7 @@ def safe_environ_patcher(key: str, value: Any) -> Iterator[None]: class TestConfig(openml.testing.TestBase): @unittest.mock.patch("openml.config.openml_logger.warning") - @unittest.mock.patch("openml.config.OpenMLConfigManager._create_log_handlers") + @unittest.mock.patch("openml._config.OpenMLConfigManager._create_log_handlers") @unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033") @unittest.skipIf( platform.uname().release.endswith(("-Microsoft", "microsoft-standard-WSL2")), From fa3cd40955b16bea0c0d6479473ee4253f46457f Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 16 Jan 2026 16:47:07 +0530 Subject: [PATCH 024/156] bug fixing --- openml/cli.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/openml/cli.py b/openml/cli.py index 5f8f21f03..67b3ee7c4 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -10,7 +10,7 @@ from pathlib import Path from urllib.parse import urlparse -from openml import config +import openml from openml.__version__ import __version__ @@ -60,17 +60,17 @@ def wait_until_valid_input( def print_configuration() -> None: - file = config.determine_config_file_path() + file = openml.config.determine_config_file_path() header = f"File '{file}' contains (or defaults to):" print(header) - max_key_length = max(map(len, config.get_config_as_dict())) - for field, value in config.get_config_as_dict().items(): + max_key_length = max(map(len, openml.config.get_config_as_dict())) + for field, value in openml.config.get_config_as_dict().items(): print(f"{field.ljust(max_key_length)}: {value}") def verbose_set(field: str, value: str) -> None: - config.set_field_in_config_file(field, value) + openml.config.set_field_in_config_file(field, value) print(f"{field} set to '{value}'.") @@ -83,7 +83,7 @@ def check_apikey(apikey: str) -> str: return "" instructions = ( - f"Your current API key is set to: '{config.apikey}'. " + f"Your current API key is set to: '{openml.config.apikey}'. " "You can get an API key at https://new.openml.org. " "You must create an account if you don't have one yet:\n" " 1. Log in with the account.\n" @@ -349,7 +349,7 @@ def main() -> None: ) configurable_fields = [ - f.name for f in fields(config.OpenMLConfig) if f.name not in ["max_retries"] + f.name for f in fields(openml._config.OpenMLConfig) if f.name not in ["max_retries"] ] parser_configure.add_argument( From c6033832e8008d0d8f94fa196d519e35f24030c3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 21 Jan 2026 10:47:26 +0500 Subject: [PATCH 025/156] add tests directory --- tests/test_api/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/test_api/__init__.py diff --git a/tests/test_api/__init__.py b/tests/test_api/__init__.py new file mode 100644 index 000000000..e69de29bb From ff6a8b05314e74bba7ad64388304a3708f83dbf0 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 21 Jan 2026 11:40:23 +0500 Subject: [PATCH 026/156] use enum for delay method --- openml/_api/config.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 848fe8da1..13063df7a 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -1,9 +1,12 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Literal +from enum import Enum -DelayMethod = Literal["human", "robot"] + +class DelayMethod(str, Enum): + HUMAN = "human" + ROBOT = "robot" @dataclass @@ -23,13 +26,9 @@ class APISettings: @dataclass class ConnectionConfig: retries: int = 3 - delay_method: DelayMethod = "human" + delay_method: DelayMethod = DelayMethod.HUMAN delay_time: int = 1 # seconds - def __post_init__(self) -> None: - if self.delay_method not in ("human", "robot"): - raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}") - @dataclass class CacheConfig: From f01898fe88b397b0c981398650664e3ecb3f9b08 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 21 Jan 2026 11:41:33 +0500 Subject: [PATCH 027/156] implement cache --- openml/_api/http/client.py | 76 ++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index a90e93933..f76efe5a1 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,5 +1,7 @@ from __future__ import annotations +import json +import time from pathlib import Path from typing import TYPE_CHECKING, Any from urllib.parse import urlencode, urljoin, urlparse @@ -34,11 +36,70 @@ def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path: return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) - def _get_cache_response(self, cache_dir: Path) -> Response: # noqa: ARG002 - return Response() + def _get_cache_response(self, cache_dir: Path) -> Response: + if not cache_dir.exists(): + raise FileNotFoundError(f"Cache directory not found: {cache_dir}") - def _set_cache_response(self, cache_dir: Path, response: Response) -> None: # noqa: ARG002 - return None + meta_path = cache_dir / "meta.json" + headers_path = cache_dir / "headers.json" + body_path = cache_dir / "body.bin" + + if not (meta_path.exists() and headers_path.exists() and body_path.exists()): + raise FileNotFoundError(f"Incomplete cache at {cache_dir}") + + with meta_path.open("r", encoding="utf-8") as f: + meta = json.load(f) + + created_at = meta.get("created_at") + if created_at is None: + raise ValueError("Cache metadata missing 'created_at'") + + if time.time() - created_at > self.ttl: + raise TimeoutError(f"Cache expired for {cache_dir}") + + with headers_path.open("r", encoding="utf-8") as f: + headers = json.load(f) + + body = body_path.read_bytes() + + response = Response() + response.status_code = meta["status_code"] + response.url = meta["url"] + response.reason = meta["reason"] + response.headers = headers + response._content = body + response.encoding = meta["encoding"] + + return response + + def _set_cache_response(self, cache_dir: Path, response: Response) -> None: + cache_dir.mkdir(parents=True, exist_ok=True) + + # body + (cache_dir / "body.bin").write_bytes(response.content) + + # headers + with (cache_dir / "headers.json").open("w", encoding="utf-8") as f: + json.dump(dict(response.headers), f) + + # meta + meta = { + "status_code": response.status_code, + "url": response.url, + "reason": response.reason, + "encoding": response.encoding, + "elapsed": response.elapsed.total_seconds(), + "created_at": time.time(), + "request": { + "method": response.request.method if response.request else None, + "url": response.request.url if response.request else None, + "headers": dict(response.request.headers) if response.request else None, + "body": response.request.body if response.request else None, + }, + } + + with (cache_dir / "meta.json").open("w", encoding="utf-8") as f: + json.dump(meta, f) class HTTPClient(CacheMixin): @@ -88,7 +149,10 @@ def request( if use_cache: try: return self._get_cache_response(cache_dir) - # TODO: handle ttl expired error + except FileNotFoundError: + pass + except TimeoutError: + pass except Exception: raise @@ -114,8 +178,6 @@ def get( use_api_key: bool = False, **request_kwargs: Any, ) -> Response: - # TODO: remove override when cache is implemented - use_cache = False return self.request( method="GET", path=path, From 5c4511e60b0bc50aba2509bc48bb931082b0caf5 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 21 Jan 2026 13:36:05 +0500 Subject: [PATCH 028/156] refactor clients --- openml/_api/clients/__init__.py | 6 + .../_api/{http/client.py => clients/http.py} | 126 +++++++++--------- .../_api/{http/utils.py => clients/minio.py} | 0 openml/_api/config.py | 6 +- openml/_api/http/__init__.py | 3 - openml/_api/runtime/core.py | 37 ++++- 6 files changed, 101 insertions(+), 77 deletions(-) create mode 100644 openml/_api/clients/__init__.py rename openml/_api/{http/client.py => clients/http.py} (61%) rename openml/_api/{http/utils.py => clients/minio.py} (100%) delete mode 100644 openml/_api/http/__init__.py diff --git a/openml/_api/clients/__init__.py b/openml/_api/clients/__init__.py new file mode 100644 index 000000000..8a5ff94e4 --- /dev/null +++ b/openml/_api/clients/__init__.py @@ -0,0 +1,6 @@ +from .http import HTTPCache, HTTPClient + +__all__ = [ + "HTTPCache", + "HTTPClient", +] diff --git a/openml/_api/http/client.py b/openml/_api/clients/http.py similarity index 61% rename from openml/_api/http/client.py rename to openml/_api/clients/http.py index f76efe5a1..4e126ee92 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/clients/http.py @@ -10,42 +10,41 @@ from requests import Response from openml.__version__ import __version__ -from openml._api.config import settings if TYPE_CHECKING: - from openml._api.config import APIConfig + from openml._api.config import DelayMethod -class CacheMixin: - @property - def dir(self) -> str: - return settings.cache.dir +class HTTPCache: + def __init__(self, *, path: Path, ttl: int) -> None: + self.path = path + self.ttl = ttl - @property - def ttl(self) -> int: - return settings.cache.ttl - - def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path: + def get_key(self, url: str, params: dict[str, Any]) -> str: parsed_url = urlparse(url) - netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain + netloc_parts = parsed_url.netloc.split(".")[::-1] path_parts = parsed_url.path.strip("/").split("/") - # remove api_key and serialize params if any filtered_params = {k: v for k, v in params.items() if k != "api_key"} params_part = [urlencode(filtered_params)] if filtered_params else [] - return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) + return str(Path(*netloc_parts, *path_parts, *params_part)) + + def _key_to_path(self, key: str) -> Path: + return self.path.joinpath(key) + + def load(self, key: str) -> Response: + path = self._key_to_path(key) - def _get_cache_response(self, cache_dir: Path) -> Response: - if not cache_dir.exists(): - raise FileNotFoundError(f"Cache directory not found: {cache_dir}") + if not path.exists(): + raise FileNotFoundError(f"Cache directory not found: {path}") - meta_path = cache_dir / "meta.json" - headers_path = cache_dir / "headers.json" - body_path = cache_dir / "body.bin" + meta_path = path / "meta.json" + headers_path = path / "headers.json" + body_path = path / "body.bin" if not (meta_path.exists() and headers_path.exists() and body_path.exists()): - raise FileNotFoundError(f"Incomplete cache at {cache_dir}") + raise FileNotFoundError(f"Incomplete cache at {path}") with meta_path.open("r", encoding="utf-8") as f: meta = json.load(f) @@ -55,7 +54,7 @@ def _get_cache_response(self, cache_dir: Path) -> Response: raise ValueError("Cache metadata missing 'created_at'") if time.time() - created_at > self.ttl: - raise TimeoutError(f"Cache expired for {cache_dir}") + raise TimeoutError(f"Cache expired for {path}") with headers_path.open("r", encoding="utf-8") as f: headers = json.load(f) @@ -72,17 +71,15 @@ def _get_cache_response(self, cache_dir: Path) -> Response: return response - def _set_cache_response(self, cache_dir: Path, response: Response) -> None: - cache_dir.mkdir(parents=True, exist_ok=True) + def save(self, key: str, response: Response) -> None: + path = self._key_to_path(key) + path.mkdir(parents=True, exist_ok=True) - # body - (cache_dir / "body.bin").write_bytes(response.content) + (path / "body.bin").write_bytes(response.content) - # headers - with (cache_dir / "headers.json").open("w", encoding="utf-8") as f: + with (path / "headers.json").open("w", encoding="utf-8") as f: json.dump(dict(response.headers), f) - # meta meta = { "status_code": response.status_code, "url": response.url, @@ -98,30 +95,33 @@ def _set_cache_response(self, cache_dir: Path, response: Response) -> None: }, } - with (cache_dir / "meta.json").open("w", encoding="utf-8") as f: + with (path / "meta.json").open("w", encoding="utf-8") as f: json.dump(meta, f) -class HTTPClient(CacheMixin): - def __init__(self, config: APIConfig) -> None: - self.config = config - self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - - @property - def server(self) -> str: - return self.config.server - - @property - def base_url(self) -> str: - return self.config.base_url - - @property - def key(self) -> str: - return self.config.key +class HTTPClient: + def __init__( # noqa: PLR0913 + self, + *, + server: str, + base_url: str, + api_key: str, + timeout: int, + retries: int, + delay_method: DelayMethod, + delay_time: int, + cache: HTTPCache | None = None, + ) -> None: + self.server = server + self.base_url = base_url + self.api_key = api_key + self.timeout = timeout + self.retries = retries + self.delay_method = delay_method + self.delay_time = delay_time + self.cache = cache - @property - def timeout(self) -> int: - return self.config.timeout + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} def request( self, @@ -134,27 +134,25 @@ def request( ) -> Response: url = urljoin(self.server, urljoin(self.base_url, path)) - params = request_kwargs.pop("params", {}) - params = params.copy() + # prepare params + params = request_kwargs.pop("params", {}).copy() if use_api_key: - params["api_key"] = self.key + params["api_key"] = self.api_key - headers = request_kwargs.pop("headers", {}) - headers = headers.copy() + # prepare headers + headers = request_kwargs.pop("headers", {}).copy() headers.update(self.headers) timeout = request_kwargs.pop("timeout", self.timeout) - cache_dir = self._get_cache_dir(url, params) - if use_cache: + if use_cache and self.cache is not None: + cache_key = self.cache.get_key(url, params) try: - return self._get_cache_response(cache_dir) - except FileNotFoundError: - pass - except TimeoutError: - pass + return self.cache.load(cache_key) + except (FileNotFoundError, TimeoutError): + pass # cache miss or expired, continue except Exception: - raise + raise # propagate unexpected cache errors response = requests.request( method=method, @@ -165,8 +163,8 @@ def request( **request_kwargs, ) - if use_cache: - self._set_cache_response(cache_dir, response) + if use_cache and self.cache is not None: + self.cache.save(cache_key, response) return response diff --git a/openml/_api/http/utils.py b/openml/_api/clients/minio.py similarity index 100% rename from openml/_api/http/utils.py rename to openml/_api/clients/minio.py diff --git a/openml/_api/config.py b/openml/_api/config.py index 13063df7a..aa153a556 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -13,7 +13,7 @@ class DelayMethod(str, Enum): class APIConfig: server: str base_url: str - key: str + api_key: str timeout: int = 10 # seconds @@ -48,12 +48,12 @@ class Settings: v1=APIConfig( server="https://www.openml.org/", base_url="api/v1/xml/", - key="...", + api_key="...", ), v2=APIConfig( server="http://127.0.0.1:8001/", base_url="", - key="...", + api_key="...", ), ), connection=ConnectionConfig(), diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py deleted file mode 100644 index 8e6d1e4ce..000000000 --- a/openml/_api/http/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from openml._api.http.client import HTTPClient - -__all__ = ["HTTPClient"] diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 98b587411..483b74d3d 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -1,9 +1,10 @@ from __future__ import annotations +from pathlib import Path from typing import TYPE_CHECKING +from openml._api.clients import HTTPCache, HTTPClient from openml._api.config import settings -from openml._api.http.client import HTTPClient from openml._api.resources import ( DatasetsV1, DatasetsV2, @@ -22,20 +23,42 @@ def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): def build_backend(version: str, *, strict: bool) -> APIBackend: - v1_http = HTTPClient(config=settings.api.v1) - v2_http = HTTPClient(config=settings.api.v2) + http_cache = HTTPCache( + path=Path(settings.cache.dir), + ttl=settings.cache.ttl, + ) + v1_http_client = HTTPClient( + server=settings.api.v1.server, + base_url=settings.api.v1.base_url, + api_key=settings.api.v1.api_key, + timeout=settings.api.v1.timeout, + retries=settings.connection.retries, + delay_method=settings.connection.delay_method, + delay_time=settings.connection.delay_time, + cache=http_cache, + ) + v2_http_client = HTTPClient( + server=settings.api.v2.server, + base_url=settings.api.v2.base_url, + api_key=settings.api.v2.api_key, + timeout=settings.api.v2.timeout, + retries=settings.connection.retries, + delay_method=settings.connection.delay_method, + delay_time=settings.connection.delay_time, + cache=http_cache, + ) v1 = APIBackend( - datasets=DatasetsV1(v1_http), - tasks=TasksV1(v1_http), + datasets=DatasetsV1(v1_http_client), + tasks=TasksV1(v1_http_client), ) if version == "v1": return v1 v2 = APIBackend( - datasets=DatasetsV2(v2_http), - tasks=TasksV2(v2_http), + datasets=DatasetsV2(v2_http_client), + tasks=TasksV2(v2_http_client), ) if strict: From 43276d2ac56ba39d195b5d54d72bed2e61da3f79 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 23 Jan 2026 12:17:53 +0500 Subject: [PATCH 029/156] fix import in resources/base.py --- openml/_api/resources/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py index 6fbf8977d..54b40a0e0 100644 --- a/openml/_api/resources/base.py +++ b/openml/_api/resources/base.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: from requests import Response - from openml._api.http import HTTPClient + from openml._api.clients import HTTPClient from openml.datasets.dataset import OpenMLDataset from openml.tasks.task import OpenMLTask From 1206f697d09df82ed7f18bfea94a476844e01cb4 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 26 Jan 2026 13:52:20 +0500 Subject: [PATCH 030/156] refactor and add exception handling --- openml/_api/clients/http.py | 241 +++++++++++++++++++++++++++++++++--- openml/_api/config.py | 5 +- openml/_api/runtime/core.py | 6 +- 3 files changed, 229 insertions(+), 23 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 4e126ee92..dc184074d 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -1,18 +1,28 @@ from __future__ import annotations import json +import logging +import math +import random import time +import xml +from collections.abc import Mapping from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import Any from urllib.parse import urlencode, urljoin, urlparse import requests +import xmltodict from requests import Response from openml.__version__ import __version__ - -if TYPE_CHECKING: - from openml._api.config import DelayMethod +from openml._api.config import RetryPolicy +from openml.exceptions import ( + OpenMLNotAuthorizedError, + OpenMLServerError, + OpenMLServerException, + OpenMLServerNoResult, +) class HTTPCache: @@ -108,8 +118,7 @@ def __init__( # noqa: PLR0913 api_key: str, timeout: int, retries: int, - delay_method: DelayMethod, - delay_time: int, + retry_policy: RetryPolicy, cache: HTTPCache | None = None, ) -> None: self.server = server @@ -117,12 +126,194 @@ def __init__( # noqa: PLR0913 self.api_key = api_key self.timeout = timeout self.retries = retries - self.delay_method = delay_method - self.delay_time = delay_time + self.retry_policy = retry_policy self.cache = cache + self.retry_func = ( + self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay + ) self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + def _robot_delay(self, n: int) -> float: + wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 + variation = random.gauss(0, wait / 10) + return max(1.0, wait + variation) + + def _human_delay(self, n: int) -> float: + return max(1.0, n) + + def _parse_exception_response( + self, + response: Response, + ) -> tuple[int | None, str]: + content_type = response.headers.get("Content-Type", "").lower() + + if "json" in content_type: + server_exception = response.json() + server_error = server_exception["detail"] + code = server_error.get("code") + message = server_error.get("message") + additional_information = server_error.get("additional_information") + else: + server_exception = xmltodict.parse(response.text) + server_error = server_exception["oml:error"] + code = server_error.get("oml:code") + message = server_error.get("oml:message") + additional_information = server_error.get("oml:additional_information") + + if code is not None: + code = int(code) + + if message and additional_information: + full_message = f"{message} - {additional_information}" + elif message: + full_message = message + elif additional_information: + full_message = additional_information + else: + full_message = "" + + return code, full_message + + def _raise_code_specific_error( + self, + code: int, + message: str, + url: str, + files: Mapping[str, Any] | None, + ) -> None: + if code in [111, 372, 512, 500, 482, 542, 674]: + # 512 for runs, 372 for datasets, 500 for flows + # 482 for tasks, 542 for evaluations, 674 for setups + # 111 for dataset descriptions + raise OpenMLServerNoResult(code=code, message=message, url=url) + + # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow) + if code in [163] and files is not None and "description" in files: + # file_elements['description'] is the XML file description of the flow + message = f"\n{files['description']}\n{message}" + + if code in [ + 102, # flow/exists post + 137, # dataset post + 350, # dataset/42 delete + 310, # flow/ post + 320, # flow/42 delete + 400, # run/42 delete + 460, # task/42 delete + ]: + raise OpenMLNotAuthorizedError( + message=( + f"The API call {url} requires authentication via an API key.\nPlease configure " + "OpenML-Python to use your API as described in this example:" + "\nhttps://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication" + ) + ) + + # Propagate all server errors to the calling functions, except + # for 107 which represents a database connection error. + # These are typically caused by high server load, + # which means trying again might resolve the issue. + # DATABASE_CONNECTION_ERRCODE + if code != 107: + raise OpenMLServerException(code=code, message=message, url=url) + + def _validate_response( + self, + method: str, + url: str, + files: Mapping[str, Any] | None, + response: Response, + ) -> Exception | None: + if ( + "Content-Encoding" not in response.headers + or response.headers["Content-Encoding"] != "gzip" + ): + logging.warning(f"Received uncompressed content from OpenML for {url}.") + + if response.status_code == 200: + return None + + if response.status_code == requests.codes.URI_TOO_LONG: + raise OpenMLServerError(f"URI too long! ({url})") + + retry_raise_e: Exception | None = None + + try: + code, message = self._parse_exception_response(response) + + except (requests.exceptions.JSONDecodeError, xml.parsers.expat.ExpatError) as e: + if method != "GET": + extra = f"Status code: {response.status_code}\n{response.text}" + raise OpenMLServerError( + f"Unexpected server error when calling {url}. Please contact the " + f"developers!\n{extra}" + ) from e + + retry_raise_e = e + + except Exception as e: + # If we failed to parse it out, + # then something has gone wrong in the body we have sent back + # from the server and there is little extra information we can capture. + raise OpenMLServerError( + f"Unexpected server error when calling {url}. Please contact the developers!\n" + f"Status code: {response.status_code}\n{response.text}", + ) from e + + if code is not None: + self._raise_code_specific_error( + code=code, + message=message, + url=url, + files=files, + ) + + if retry_raise_e is None: + retry_raise_e = OpenMLServerException(code=code, message=message, url=url) + + return retry_raise_e + + def _request( # noqa: PLR0913 + self, + method: str, + url: str, + params: Mapping[str, Any], + headers: Mapping[str, str], + timeout: float | int, + files: Mapping[str, Any] | None, + **request_kwargs: Any, + ) -> tuple[Response | None, Exception | None]: + retry_raise_e: Exception | None = None + response: Response | None = None + + try: + response = requests.request( + method=method, + url=url, + params=params, + headers=headers, + timeout=timeout, + files=files, + **request_kwargs, + ) + except ( + requests.exceptions.ChunkedEncodingError, + requests.exceptions.ConnectionError, + requests.exceptions.SSLError, + ) as e: + retry_raise_e = e + + if response is not None: + retry_raise_e = self._validate_response( + method=method, + url=url, + files=files, + response=response, + ) + + return response, retry_raise_e + def request( self, method: str, @@ -133,6 +324,7 @@ def request( **request_kwargs: Any, ) -> Response: url = urljoin(self.server, urljoin(self.base_url, path)) + retries = max(1, self.retries) # prepare params params = request_kwargs.pop("params", {}).copy() @@ -144,6 +336,9 @@ def request( headers.update(self.headers) timeout = request_kwargs.pop("timeout", self.timeout) + files = request_kwargs.pop("files", None) + + use_cache = False if use_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) @@ -154,14 +349,28 @@ def request( except Exception: raise # propagate unexpected cache errors - response = requests.request( - method=method, - url=url, - params=params, - headers=headers, - timeout=timeout, - **request_kwargs, - ) + for retry_counter in range(1, retries + 1): + response, retry_raise_e = self._request( + method=method, + url=url, + params=params, + headers=headers, + timeout=timeout, + files=files, + **request_kwargs, + ) + + # executed successfully + if retry_raise_e is None: + break + # tries completed + if retry_counter >= retries: + raise retry_raise_e + + delay = self.retry_func(retry_counter) + time.sleep(delay) + + assert response is not None if use_cache and self.cache is not None: self.cache.save(cache_key, response) diff --git a/openml/_api/config.py b/openml/_api/config.py index aa153a556..6cce06403 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -4,7 +4,7 @@ from enum import Enum -class DelayMethod(str, Enum): +class RetryPolicy(str, Enum): HUMAN = "human" ROBOT = "robot" @@ -26,8 +26,7 @@ class APISettings: @dataclass class ConnectionConfig: retries: int = 3 - delay_method: DelayMethod = DelayMethod.HUMAN - delay_time: int = 1 # seconds + retry_policy: RetryPolicy = RetryPolicy.HUMAN @dataclass diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 483b74d3d..25f2649ee 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -33,8 +33,7 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: api_key=settings.api.v1.api_key, timeout=settings.api.v1.timeout, retries=settings.connection.retries, - delay_method=settings.connection.delay_method, - delay_time=settings.connection.delay_time, + retry_policy=settings.connection.retry_policy, cache=http_cache, ) v2_http_client = HTTPClient( @@ -43,8 +42,7 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: api_key=settings.api.v2.api_key, timeout=settings.api.v2.timeout, retries=settings.connection.retries, - delay_method=settings.connection.delay_method, - delay_time=settings.connection.delay_time, + retry_policy=settings.connection.retry_policy, cache=http_cache, ) From 4948e991f96821372934c7132f4a695da165d17b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 26 Jan 2026 20:43:32 +0500 Subject: [PATCH 031/156] refactor resources/base/ --- openml/_api/resources/base/__init__.py | 13 ++++++ openml/_api/resources/base/base.py | 41 +++++++++++++++++++ .../resources/{base.py => base/resources.py} | 16 ++++---- openml/_api/resources/base/versions.py | 23 +++++++++++ openml/_api/resources/datasets.py | 6 +-- openml/_api/resources/tasks.py | 6 +-- 6 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 openml/_api/resources/base/__init__.py create mode 100644 openml/_api/resources/base/base.py rename openml/_api/resources/{base.py => base/resources.py} (64%) create mode 100644 openml/_api/resources/base/versions.py diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py new file mode 100644 index 000000000..851cfe942 --- /dev/null +++ b/openml/_api/resources/base/__init__.py @@ -0,0 +1,13 @@ +from openml._api.resources.base.base import APIVersion, ResourceAPI, ResourceType +from openml._api.resources.base.resources import DatasetsAPI, TasksAPI +from openml._api.resources.base.versions import ResourceV1, ResourceV2 + +__all__ = [ + "APIVersion", + "DatasetsAPI", + "ResourceAPI", + "ResourceType", + "ResourceV1", + "ResourceV2", + "TasksAPI", +] diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py new file mode 100644 index 000000000..8d85d054b --- /dev/null +++ b/openml/_api/resources/base/base.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from enum import Enum +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.clients import HTTPClient + + +class APIVersion(str, Enum): + V1 = "v1" + V2 = "v2" + + +class ResourceType(str, Enum): + DATASETS = "datasets" + TASKS = "tasks" + + +class ResourceAPI(ABC): + api_version: APIVersion | None = None + resource_type: ResourceType | None = None + + def __init__(self, http: HTTPClient): + self._http = http + + def _raise_not_implemented_error(self, method_name: str | None = None) -> None: + version = getattr(self.api_version, "name", "Unknown version") + resource = getattr(self.resource_type, "name", "Unknown resource") + method_info = f" Method: {method_name}" if method_name else "" + raise NotImplementedError( + f"{self.__class__.__name__}: {version} API does not support this " + f"functionality for resource: {resource}.{method_info}" + ) + + @abstractmethod + def delete(self) -> None: ... + + @abstractmethod + def publish(self) -> None: ... diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base/resources.py similarity index 64% rename from openml/_api/resources/base.py rename to openml/_api/resources/base/resources.py index 54b40a0e0..edb26c91c 100644 --- a/openml/_api/resources/base.py +++ b/openml/_api/resources/base/resources.py @@ -1,27 +1,27 @@ from __future__ import annotations -from abc import ABC, abstractmethod +from abc import abstractmethod from typing import TYPE_CHECKING +from openml._api.resources.base import ResourceAPI, ResourceType + if TYPE_CHECKING: from requests import Response - from openml._api.clients import HTTPClient from openml.datasets.dataset import OpenMLDataset from openml.tasks.task import OpenMLTask -class ResourceAPI: - def __init__(self, http: HTTPClient): - self._http = http - +class DatasetsAPI(ResourceAPI): + resource_type: ResourceType | None = ResourceType.DATASETS -class DatasetsAPI(ResourceAPI, ABC): @abstractmethod def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... -class TasksAPI(ResourceAPI, ABC): +class TasksAPI(ResourceAPI): + resource_type: ResourceType | None = ResourceType.TASKS + @abstractmethod def get( self, diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py new file mode 100644 index 000000000..8a81517e5 --- /dev/null +++ b/openml/_api/resources/base/versions.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from openml._api.resources.base import APIVersion, ResourceAPI + + +class ResourceV1(ResourceAPI): + api_version: APIVersion | None = APIVersion.V1 + + def delete(self) -> None: + pass + + def publish(self) -> None: + pass + + +class ResourceV2(ResourceAPI): + api_version: APIVersion | None = APIVersion.V2 + + def delete(self) -> None: + self._raise_not_implemented_error("delete") + + def publish(self) -> None: + self._raise_not_implemented_error("publish") diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py index 9ff1ec278..f3a49a84f 100644 --- a/openml/_api/resources/datasets.py +++ b/openml/_api/resources/datasets.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING -from openml._api.resources.base import DatasetsAPI +from openml._api.resources.base import DatasetsAPI, ResourceV1, ResourceV2 if TYPE_CHECKING: from responses import Response @@ -10,11 +10,11 @@ from openml.datasets.dataset import OpenMLDataset -class DatasetsV1(DatasetsAPI): +class DatasetsV1(ResourceV1, DatasetsAPI): def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: raise NotImplementedError -class DatasetsV2(DatasetsAPI): +class DatasetsV2(ResourceV2, DatasetsAPI): def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: raise NotImplementedError diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index f494fb9a3..a7ca39208 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -4,7 +4,7 @@ import xmltodict -from openml._api.resources.base import TasksAPI +from openml._api.resources.base import ResourceV1, ResourceV2, TasksAPI from openml.tasks.task import ( OpenMLClassificationTask, OpenMLClusteringTask, @@ -18,7 +18,7 @@ from requests import Response -class TasksV1(TasksAPI): +class TasksV1(ResourceV1, TasksAPI): def get( self, task_id: int, @@ -118,7 +118,7 @@ def _create_task_from_xml(self, xml: str) -> OpenMLTask: return cls(**common_kwargs) # type: ignore -class TasksV2(TasksAPI): +class TasksV2(ResourceV2, TasksAPI): def get( self, task_id: int, From a3541675fd6452e68f268127df7c583bb9c2d0ca Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 26 Jan 2026 21:06:20 +0500 Subject: [PATCH 032/156] implement delete --- openml/_api/resources/base/base.py | 23 +++++--- openml/_api/resources/base/resources.py | 4 +- openml/_api/resources/base/versions.py | 76 ++++++++++++++++++++++--- 3 files changed, 86 insertions(+), 17 deletions(-) diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 8d85d054b..9b1803508 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -14,28 +14,37 @@ class APIVersion(str, Enum): class ResourceType(str, Enum): - DATASETS = "datasets" - TASKS = "tasks" + DATASET = "dataset" + TASK = "task" + TASK_TYPE = "task_type" + EVALUATION_MEASURE = "evaluation_measure" + ESTIMATION_PROCEDURE = "estimation_procedure" + EVALUATION = "evaluation" + FLOW = "flow" + STUDY = "study" + RUN = "run" + SETUP = "setup" + USER = "user" class ResourceAPI(ABC): - api_version: APIVersion | None = None - resource_type: ResourceType | None = None + api_version: APIVersion + resource_type: ResourceType def __init__(self, http: HTTPClient): self._http = http - def _raise_not_implemented_error(self, method_name: str | None = None) -> None: + def _get_not_implemented_message(self, method_name: str | None = None) -> str: version = getattr(self.api_version, "name", "Unknown version") resource = getattr(self.resource_type, "name", "Unknown resource") method_info = f" Method: {method_name}" if method_name else "" - raise NotImplementedError( + return ( f"{self.__class__.__name__}: {version} API does not support this " f"functionality for resource: {resource}.{method_info}" ) @abstractmethod - def delete(self) -> None: ... + def delete(self, resource_id: int) -> bool: ... @abstractmethod def publish(self) -> None: ... diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index edb26c91c..55cb95c0d 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -13,14 +13,14 @@ class DatasetsAPI(ResourceAPI): - resource_type: ResourceType | None = ResourceType.DATASETS + resource_type: ResourceType = ResourceType.DATASET @abstractmethod def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... class TasksAPI(ResourceAPI): - resource_type: ResourceType | None = ResourceType.TASKS + resource_type: ResourceType = ResourceType.TASK @abstractmethod def get( diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 8a81517e5..ce7b02057 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -1,23 +1,83 @@ from __future__ import annotations -from openml._api.resources.base import APIVersion, ResourceAPI +import xmltodict + +from openml._api.resources.base import APIVersion, ResourceAPI, ResourceType +from openml.exceptions import ( + OpenMLNotAuthorizedError, + OpenMLServerError, + OpenMLServerException, +) class ResourceV1(ResourceAPI): - api_version: APIVersion | None = APIVersion.V1 + api_version: APIVersion = APIVersion.V1 - def delete(self) -> None: - pass + def delete(self, resource_id: int) -> bool: + if self.resource_type == ResourceType.DATASET: + resource_type = "data" + else: + resource_type = self.resource_type.name + + legal_resources = { + "data", + "flow", + "task", + "run", + "study", + "user", + } + if resource_type not in legal_resources: + raise ValueError(f"Can't delete a {resource_type}") + + url_suffix = f"{resource_type}/{resource_id}" + try: + response = self._http.delete(url_suffix) + result = xmltodict.parse(response.content) + return f"oml:{resource_type}_delete" in result + except OpenMLServerException as e: + # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php + # Most exceptions are descriptive enough to be raised as their standard + # OpenMLServerException, however there are two cases where we add information: + # - a generic "failed" message, we direct them to the right issue board + # - when the user successfully authenticates with the server, + # but user is not allowed to take the requested action, + # in which case we specify a OpenMLNotAuthorizedError. + by_other_user = [323, 353, 393, 453, 594] + has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595] + unknown_reason = [325, 355, 394, 455, 593] + if e.code in by_other_user: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted " + "because it was not uploaded by you." + ), + ) from e + if e.code in has_dependent_entities: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because " + f"it still has associated entities: {e.message}" + ), + ) from e + if e.code in unknown_reason: + raise OpenMLServerError( + message=( + f"The {resource_type} can not be deleted for unknown reason," + " please open an issue at: https://github.com/openml/openml/issues/new" + ), + ) from e + raise e def publish(self) -> None: pass class ResourceV2(ResourceAPI): - api_version: APIVersion | None = APIVersion.V2 + api_version: APIVersion = APIVersion.V2 - def delete(self) -> None: - self._raise_not_implemented_error("delete") + def delete(self, resource_id: int) -> bool: + raise NotImplementedError(self._get_not_implemented_message("publish")) def publish(self) -> None: - self._raise_not_implemented_error("publish") + raise NotImplementedError(self._get_not_implemented_message("publish")) From 1fe7e3ed8561945c20e8433603046a35484c37e7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 27 Jan 2026 12:56:35 +0500 Subject: [PATCH 033/156] implement publish and minor refactoring --- openml/_api/clients/http.py | 2 - openml/_api/resources/base/base.py | 15 ++-- openml/_api/resources/base/versions.py | 113 ++++++++++++++++--------- 3 files changed, 82 insertions(+), 48 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index dc184074d..1622087c9 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -338,8 +338,6 @@ def request( timeout = request_kwargs.pop("timeout", self.timeout) files = request_kwargs.pop("files", None) - use_cache = False - if use_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) try: diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 9b1803508..f2d7d1e88 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -5,6 +5,9 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + from collections.abc import Mapping + from typing import Any + from openml._api.clients import HTTPClient @@ -34,6 +37,12 @@ class ResourceAPI(ABC): def __init__(self, http: HTTPClient): self._http = http + @abstractmethod + def delete(self, resource_id: int) -> bool: ... + + @abstractmethod + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: ... + def _get_not_implemented_message(self, method_name: str | None = None) -> str: version = getattr(self.api_version, "name", "Unknown version") resource = getattr(self.resource_type, "name", "Unknown resource") @@ -42,9 +51,3 @@ def _get_not_implemented_message(self, method_name: str | None = None) -> str: f"{self.__class__.__name__}: {version} API does not support this " f"functionality for resource: {resource}.{method_info}" ) - - @abstractmethod - def delete(self, resource_id: int) -> bool: ... - - @abstractmethod - def publish(self) -> None: ... diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index ce7b02057..41f883ebe 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -1,5 +1,8 @@ from __future__ import annotations +from collections.abc import Mapping +from typing import Any + import xmltodict from openml._api.resources.base import APIVersion, ResourceAPI, ResourceType @@ -13,6 +16,11 @@ class ResourceV1(ResourceAPI): api_version: APIVersion = APIVersion.V1 + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: + response = self._http.post(path, files=files) + parsed_response = xmltodict.parse(response.content) + return self._extract_id_from_upload(parsed_response) + def delete(self, resource_id: int) -> bool: if self.resource_type == ResourceType.DATASET: resource_type = "data" @@ -30,54 +38,79 @@ def delete(self, resource_id: int) -> bool: if resource_type not in legal_resources: raise ValueError(f"Can't delete a {resource_type}") - url_suffix = f"{resource_type}/{resource_id}" + path = f"{resource_type}/{resource_id}" try: - response = self._http.delete(url_suffix) + response = self._http.delete(path) result = xmltodict.parse(response.content) return f"oml:{resource_type}_delete" in result except OpenMLServerException as e: - # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php - # Most exceptions are descriptive enough to be raised as their standard - # OpenMLServerException, however there are two cases where we add information: - # - a generic "failed" message, we direct them to the right issue board - # - when the user successfully authenticates with the server, - # but user is not allowed to take the requested action, - # in which case we specify a OpenMLNotAuthorizedError. - by_other_user = [323, 353, 393, 453, 594] - has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595] - unknown_reason = [325, 355, 394, 455, 593] - if e.code in by_other_user: - raise OpenMLNotAuthorizedError( - message=( - f"The {resource_type} can not be deleted " - "because it was not uploaded by you." - ), - ) from e - if e.code in has_dependent_entities: - raise OpenMLNotAuthorizedError( - message=( - f"The {resource_type} can not be deleted because " - f"it still has associated entities: {e.message}" - ), - ) from e - if e.code in unknown_reason: - raise OpenMLServerError( - message=( - f"The {resource_type} can not be deleted for unknown reason," - " please open an issue at: https://github.com/openml/openml/issues/new" - ), - ) from e - raise e - - def publish(self) -> None: - pass + self._handle_delete_exception(resource_type, e) + raise + + def _handle_delete_exception( + self, resource_type: str, exception: OpenMLServerException + ) -> None: + # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php + # Most exceptions are descriptive enough to be raised as their standard + # OpenMLServerException, however there are two cases where we add information: + # - a generic "failed" message, we direct them to the right issue board + # - when the user successfully authenticates with the server, + # but user is not allowed to take the requested action, + # in which case we specify a OpenMLNotAuthorizedError. + by_other_user = [323, 353, 393, 453, 594] + has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595] + unknown_reason = [325, 355, 394, 455, 593] + if exception.code in by_other_user: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because it was not uploaded by you." + ), + ) from exception + if exception.code in has_dependent_entities: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because " + f"it still has associated entities: {exception.message}" + ), + ) from exception + if exception.code in unknown_reason: + raise OpenMLServerError( + message=( + f"The {resource_type} can not be deleted for unknown reason," + " please open an issue at: https://github.com/openml/openml/issues/new" + ), + ) from exception + raise exception + + def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: + # reads id from + # sample parsed dict: {"oml:openml": {"oml:upload_flow": {"oml:id": "42"}}} + + # xmltodict always gives exactly one root key + ((_, root_value),) = parsed.items() + + if not isinstance(root_value, Mapping): + raise ValueError("Unexpected XML structure") + + # upload node (e.g. oml:upload_task, oml:study_upload, ...) + ((_, upload_value),) = root_value.items() + + if not isinstance(upload_value, Mapping): + raise ValueError("Unexpected upload node structure") + + # ID is the only leaf value + for v in upload_value.values(): + if isinstance(v, (str, int)): + return int(v) + + raise ValueError("No ID found in upload response") class ResourceV2(ResourceAPI): api_version: APIVersion = APIVersion.V2 - def delete(self, resource_id: int) -> bool: + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: raise NotImplementedError(self._get_not_implemented_message("publish")) - def publish(self) -> None: - raise NotImplementedError(self._get_not_implemented_message("publish")) + def delete(self, resource_id: int) -> bool: + raise NotImplementedError(self._get_not_implemented_message("delete")) From 54a3151932e3c50bda983f6d6609a4740e38a0c7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 27 Jan 2026 14:17:40 +0500 Subject: [PATCH 034/156] implement tag/untag --- openml/_api/clients/http.py | 10 +++- openml/_api/resources/base/base.py | 6 +++ openml/_api/resources/base/versions.py | 63 ++++++++++++++++++++------ openml/_api/resources/tasks.py | 4 +- 4 files changed, 67 insertions(+), 16 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 1622087c9..65d7b2248 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -279,6 +279,7 @@ def _request( # noqa: PLR0913 method: str, url: str, params: Mapping[str, Any], + data: Mapping[str, Any], headers: Mapping[str, str], timeout: float | int, files: Mapping[str, Any] | None, @@ -292,6 +293,7 @@ def _request( # noqa: PLR0913 method=method, url=url, params=params, + data=data, headers=headers, timeout=timeout, files=files, @@ -326,11 +328,16 @@ def request( url = urljoin(self.server, urljoin(self.base_url, path)) retries = max(1, self.retries) - # prepare params params = request_kwargs.pop("params", {}).copy() + data = request_kwargs.pop("data", {}).copy() + if use_api_key: params["api_key"] = self.api_key + if method.upper() in {"POST", "PUT", "PATCH"}: + data = {**params, **data} + params = {} + # prepare headers headers = request_kwargs.pop("headers", {}).copy() headers.update(self.headers) @@ -352,6 +359,7 @@ def request( method=method, url=url, params=params, + data=data, headers=headers, timeout=timeout, files=files, diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index f2d7d1e88..63d4c40eb 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -43,6 +43,12 @@ def delete(self, resource_id: int) -> bool: ... @abstractmethod def publish(self, path: str, files: Mapping[str, Any] | None) -> int: ... + @abstractmethod + def tag(self, resource_id: int, tag: str) -> list[str]: ... + + @abstractmethod + def untag(self, resource_id: int, tag: str) -> list[str]: ... + def _get_not_implemented_message(self, method_name: str | None = None) -> str: version = getattr(self.api_version, "name", "Unknown version") resource = getattr(self.resource_type, "name", "Unknown resource") diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 41f883ebe..91c1a8c06 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -22,19 +22,9 @@ def publish(self, path: str, files: Mapping[str, Any] | None) -> int: return self._extract_id_from_upload(parsed_response) def delete(self, resource_id: int) -> bool: - if self.resource_type == ResourceType.DATASET: - resource_type = "data" - else: - resource_type = self.resource_type.name - - legal_resources = { - "data", - "flow", - "task", - "run", - "study", - "user", - } + resource_type = self._get_endpoint_name() + + legal_resources = {"data", "flow", "task", "run", "study", "user"} if resource_type not in legal_resources: raise ValueError(f"Can't delete a {resource_type}") @@ -47,6 +37,47 @@ def delete(self, resource_id: int) -> bool: self._handle_delete_exception(resource_type, e) raise + def tag(self, resource_id: int, tag: str) -> list[str]: + resource_type = self._get_endpoint_name() + + legal_resources = {"data", "task", "flow", "setup", "run"} + if resource_type not in legal_resources: + raise ValueError(f"Can't tag a {resource_type}") + + path = f"{resource_type}/tag" + data = {f"{resource_type}_id": resource_id, "tag": tag} + response = self._http.post(path, data=data) + + main_tag = f"oml:{resource_type}_tag" + parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) + result = parsed_response[main_tag] + tags: list[str] = result.get("oml:tag", []) + + return tags + + def untag(self, resource_id: int, tag: str) -> list[str]: + resource_type = self._get_endpoint_name() + + legal_resources = {"data", "task", "flow", "setup", "run"} + if resource_type not in legal_resources: + raise ValueError(f"Can't tag a {resource_type}") + + path = f"{resource_type}/untag" + data = {f"{resource_type}_id": resource_id, "tag": tag} + response = self._http.post(path, data=data) + + main_tag = f"oml:{resource_type}_untag" + parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) + result = parsed_response[main_tag] + tags: list[str] = result.get("oml:tag", []) + + return tags + + def _get_endpoint_name(self) -> str: + if self.resource_type == ResourceType.DATASET: + return "data" + return self.resource_type.name + def _handle_delete_exception( self, resource_type: str, exception: OpenMLServerException ) -> None: @@ -114,3 +145,9 @@ def publish(self, path: str, files: Mapping[str, Any] | None) -> int: def delete(self, resource_id: int) -> bool: raise NotImplementedError(self._get_not_implemented_message("delete")) + + def tag(self, resource_id: int, tag: str) -> list[str]: + raise NotImplementedError(self._get_not_implemented_message("untag")) + + def untag(self, resource_id: int, tag: str) -> list[str]: + raise NotImplementedError(self._get_not_implemented_message("untag")) diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index a7ca39208..295e7a73d 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -26,7 +26,7 @@ def get( return_response: bool = False, ) -> OpenMLTask | tuple[OpenMLTask, Response]: path = f"task/{task_id}" - response = self._http.get(path) + response = self._http.get(path, use_cache=True) xml_content = response.text task = self._create_task_from_xml(xml_content) @@ -125,4 +125,4 @@ def get( *, return_response: bool = False, ) -> OpenMLTask | tuple[OpenMLTask, Response]: - raise NotImplementedError + raise NotImplementedError(self._get_not_implemented_message("get")) From 2b6fe6507b349703060f060f0184169abf5e20de Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 27 Jan 2026 18:31:39 +0500 Subject: [PATCH 035/156] implement fallback --- openml/_api/resources/__init__.py | 3 +- openml/_api/resources/base/__init__.py | 2 + openml/_api/resources/base/fallback.py | 56 ++++++++++++++++++++++++++ openml/_api/runtime/core.py | 8 +++- openml/_api/runtime/fallback.py | 12 ------ 5 files changed, 66 insertions(+), 15 deletions(-) create mode 100644 openml/_api/resources/base/fallback.py delete mode 100644 openml/_api/runtime/fallback.py diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index b1af3c1a8..6c0807e0f 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,4 +1,5 @@ +from openml._api.resources.base.fallback import FallbackProxy from openml._api.resources.datasets import DatasetsV1, DatasetsV2 from openml._api.resources.tasks import TasksV1, TasksV2 -__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"] +__all__ = ["DatasetsV1", "DatasetsV2", "FallbackProxy", "TasksV1", "TasksV2"] diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py index 851cfe942..bddc09b21 100644 --- a/openml/_api/resources/base/__init__.py +++ b/openml/_api/resources/base/__init__.py @@ -1,10 +1,12 @@ from openml._api.resources.base.base import APIVersion, ResourceAPI, ResourceType +from openml._api.resources.base.fallback import FallbackProxy from openml._api.resources.base.resources import DatasetsAPI, TasksAPI from openml._api.resources.base.versions import ResourceV1, ResourceV2 __all__ = [ "APIVersion", "DatasetsAPI", + "FallbackProxy", "ResourceAPI", "ResourceType", "ResourceV1", diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py new file mode 100644 index 000000000..253ee3865 --- /dev/null +++ b/openml/_api/resources/base/fallback.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from collections.abc import Callable +from typing import Any + + +class FallbackProxy: + def __init__(self, *api_versions: Any): + if not api_versions: + raise ValueError("At least one API version must be provided") + self._apis = api_versions + + def __getattr__(self, name: str) -> Any: + api, attr = self._find_attr(name) + if callable(attr): + return self._wrap_callable(name, api, attr) + return attr + + def _find_attr(self, name: str) -> tuple[Any, Any]: + for api in self._apis: + attr = getattr(api, name, None) + if attr is not None: + return api, attr + raise AttributeError(f"{self.__class__.__name__} has no attribute {name}") + + def _wrap_callable( + self, + name: str, + primary_api: Any, + primary_attr: Callable[..., Any], + ) -> Callable[..., Any]: + def wrapper(*args: Any, **kwargs: Any) -> Any: + try: + return primary_attr(*args, **kwargs) + except NotImplementedError: + return self._call_fallbacks(name, primary_api, *args, **kwargs) + + return wrapper + + def _call_fallbacks( + self, + name: str, + skip_api: Any, + *args: Any, + **kwargs: Any, + ) -> Any: + for api in self._apis: + if api is skip_api: + continue + attr = getattr(api, name, None) + if callable(attr): + try: + return attr(*args, **kwargs) + except NotImplementedError: + continue + raise NotImplementedError(f"Could not fallback to any API for method: {name}") diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 25f2649ee..4914179f8 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -8,6 +8,7 @@ from openml._api.resources import ( DatasetsV1, DatasetsV2, + FallbackProxy, TasksV1, TasksV2, ) @@ -17,7 +18,7 @@ class APIBackend: - def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): + def __init__(self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy): self.datasets = datasets self.tasks = tasks @@ -62,7 +63,10 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: if strict: return v2 - return v1 + return APIBackend( + datasets=FallbackProxy(DatasetsV2(v2_http_client), DatasetsV1(v1_http_client)), + tasks=FallbackProxy(TasksV2(v2_http_client), TasksV1(v1_http_client)), + ) class APIContext: diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py deleted file mode 100644 index 1bc99d270..000000000 --- a/openml/_api/runtime/fallback.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from openml._api.resources.base import ResourceAPI - - -class FallbackProxy: - def __init__(self, primary: ResourceAPI, fallback: ResourceAPI): - self._primary = primary - self._fallback = fallback From fa53f8d3e10dabde3634c05a97d67560459bcaa6 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 28 Jan 2026 13:50:42 +0500 Subject: [PATCH 036/156] add test_http.py --- openml/testing.py | 88 +++++++++++++++++++++++ tests/test_api/test_http.py | 134 ++++++++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+) create mode 100644 tests/test_api/test_http.py diff --git a/openml/testing.py b/openml/testing.py index 8d3bbbd5b..b0aaac9be 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -11,10 +11,13 @@ import unittest from pathlib import Path from typing import ClassVar +from urllib.parse import urljoin import requests import openml +from openml._api.clients import HTTPCache, HTTPClient +from openml._api.config import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -276,6 +279,91 @@ def _check_fold_timing_evaluations( # noqa: PLR0913 assert evaluation <= max_val +class TestAPIBase(unittest.TestCase): + server: str + base_url: str + api_key: str + timeout: int + retries: int + retry_policy: RetryPolicy + dir: str + ttl: int + cache: HTTPCache + http_client: HTTPClient + + def setUp(self) -> None: + self.server = "https://test.openml.org/" + self.base_url = "api/v1/xml" + self.api_key = "normaluser" + self.timeout = 10 + self.retries = 3 + self.retry_policy = RetryPolicy.HUMAN + self.dir = "test_cache" + self.ttl = 60 * 60 * 24 * 7 + + self.cache = self._get_http_cache( + path=Path(self.dir), + ttl=self.ttl, + ) + self.http_client = self._get_http_client( + server=self.server, + base_url=self.base_url, + api_key=self.api_key, + timeout=self.timeout, + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ) + + if self.cache.path.exists(): + shutil.rmtree(self.cache.path) + + def tearDown(self) -> None: + if self.cache.path.exists(): + shutil.rmtree(self.cache.path) + + def _get_http_cache( + self, + path: Path, + ttl: int, + ) -> HTTPCache: + return HTTPCache( + path=path, + ttl=ttl, + ) + + def _get_http_client( # noqa: PLR0913 + self, + server: str, + base_url: str, + api_key: str, + timeout: int, + retries: int, + retry_policy: RetryPolicy, + cache: HTTPCache | None = None, + ) -> HTTPClient: + return HTTPClient( + server=server, + base_url=base_url, + api_key=api_key, + timeout=timeout, + retries=retries, + retry_policy=retry_policy, + cache=cache, + ) + + def _get_url( + self, + server: str | None = None, + base_url: str | None = None, + path: str | None = None, + ) -> str: + server = server if server else self.server + base_url = base_url if base_url else self.base_url + path = path if path else "" + return urljoin(self.server, urljoin(self.base_url, path)) + + def check_task_existence( task_type: TaskType, dataset_id: int, diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py new file mode 100644 index 000000000..98b6fda5a --- /dev/null +++ b/tests/test_api/test_http.py @@ -0,0 +1,134 @@ +from requests import Response, Request +import time +import xmltodict +from openml.testing import TestAPIBase + + +class TestHTTPClient(TestAPIBase): + def test_cache(self): + url = self._get_url(path="task/31") + params = {"param1": "value1", "param2": "value2"} + + key = self.cache.get_key(url, params) + + # validate key + self.assertEqual( + key, + "org/openml/test/api/v1/task/31/param1=value1¶m2=value2", + ) + + # create fake response + req = Request("GET", url).prepare() + response = Response() + response.status_code = 200 + response.url = url + response.reason = "OK" + response._content = b"test" + response.headers = {"Content-Type": "text/xml"} + response.encoding = "utf-8" + response.request = req + response.elapsed = type("Elapsed", (), {"total_seconds": lambda self: 0.1})() + + # save to cache + self.cache.save(key, response) + + # load from cache + cached_response = self.cache.load(key) + + # validate loaded response + self.assertEqual(cached_response.status_code, 200) + self.assertEqual(cached_response.url, url) + self.assertEqual(cached_response.content, b"test") + self.assertEqual( + cached_response.headers["Content-Type"], "text/xml" + ) + + def test_get(self): + response = self.http_client.get("task/1") + + self.assertEqual(response.status_code, 200) + self.assertIn(b" new request + self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) + self.assertEqual(response2.status_code, 200) + self.assertEqual(response1.content, response2.content) + + def test_post_and_delete(self): + task_xml = """ + + 5 + 193 + 17 + + """ + + task_id = None + try: + # POST the task + post_response = self.http_client.post( + "task", + files={"description": task_xml}, + ) + self.assertEqual(post_response.status_code, 200) + xml_resp = xmltodict.parse(post_response.content) + task_id = int(xml_resp["oml:upload_task"]["oml:id"]) + + # GET the task to verify it exists + get_response = self.http_client.get(f"task/{task_id}") + self.assertEqual(get_response.status_code, 200) + + finally: + # DELETE the task if it was created + if task_id is not None: + try: + del_response = self.http_client.delete(f"task/{task_id}") + # optional: verify delete + if del_response.status_code != 200: + print(f"Warning: delete failed for task {task_id}") + except Exception as e: + print(f"Warning: failed to delete task {task_id}: {e}") From 2b2db962fc252a2b2b23f21bd1d055905ed74588 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 28 Jan 2026 13:52:43 +0500 Subject: [PATCH 037/156] add uses_test_server marker --- tests/test_api/test_http.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 98b6fda5a..94ce5ee93 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -1,6 +1,7 @@ from requests import Response, Request import time import xmltodict +import pytest from openml.testing import TestAPIBase @@ -43,12 +44,14 @@ def test_cache(self): cached_response.headers["Content-Type"], "text/xml" ) + @pytest.mark.uses_test_server() def test_get(self): response = self.http_client.get("task/1") self.assertEqual(response.status_code, 200) self.assertIn(b" From c9617f932fce853dbe6db9a445ef98cc6cfec7f4 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 29 Jan 2026 14:40:09 +0500 Subject: [PATCH 038/156] implement reset_cache --- openml/_api/clients/http.py | 6 +++++- tests/test_api/test_http.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 65d7b2248..dfcdf5a8a 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -322,6 +322,7 @@ def request( path: str, *, use_cache: bool = False, + reset_cache: bool = False, use_api_key: bool = False, **request_kwargs: Any, ) -> Response: @@ -345,7 +346,7 @@ def request( timeout = request_kwargs.pop("timeout", self.timeout) files = request_kwargs.pop("files", None) - if use_cache and self.cache is not None: + if use_cache and not reset_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) try: return self.cache.load(cache_key) @@ -379,6 +380,7 @@ def request( assert response is not None if use_cache and self.cache is not None: + cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) return response @@ -388,6 +390,7 @@ def get( path: str, *, use_cache: bool = False, + reset_cache: bool = False, use_api_key: bool = False, **request_kwargs: Any, ) -> Response: @@ -395,6 +398,7 @@ def get( method="GET", path=path, use_cache=use_cache, + reset_cache=reset_cache, use_api_key=use_api_key, **request_kwargs, ) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 94ce5ee93..808321862 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -103,6 +103,24 @@ def test_get_cache_expires(self): self.assertEqual(response2.status_code, 200) self.assertEqual(response1.content, response2.content) + @pytest.mark.uses_test_server() + def test_get_reset_cache(self): + path = "task/1" + + url = self._get_url(path=path) + key = self.cache.get_key(url, {}) + cache_path = self.cache._key_to_path(key) / "meta.json" + + response1 = self.http_client.get(path, use_cache=True) + response1_cache_time_stamp = cache_path.stat().st_ctime + + response2 = self.http_client.get(path, use_cache=True, reset_cache=True) + response2_cache_time_stamp = cache_path.stat().st_ctime + + self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) + self.assertEqual(response2.status_code, 200) + self.assertEqual(response1.content, response2.content) + @pytest.mark.uses_test_server() def test_post_and_delete(self): task_xml = """ From 5bc37b80abc86e89644e431f48ca2d4d4ad7814c Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 29 Jan 2026 22:02:38 +0500 Subject: [PATCH 039/156] fixes with publish/delete --- openml/_api/resources/base/versions.py | 22 ++++++------- tests/test_api/test_http.py | 9 ++---- tests/test_api/test_versions.py | 44 ++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 18 deletions(-) create mode 100644 tests/test_api/test_versions.py diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 91c1a8c06..6ca2dd345 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Mapping -from typing import Any +from typing import Any, cast import xmltodict @@ -76,7 +76,7 @@ def untag(self, resource_id: int, tag: str) -> list[str]: def _get_endpoint_name(self) -> str: if self.resource_type == ResourceType.DATASET: return "data" - return self.resource_type.name + return cast("str", self.resource_type.value) def _handle_delete_exception( self, resource_type: str, exception: OpenMLServerException @@ -114,8 +114,8 @@ def _handle_delete_exception( raise exception def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: - # reads id from - # sample parsed dict: {"oml:openml": {"oml:upload_flow": {"oml:id": "42"}}} + # reads id from upload response + # actual parsed dict: {"oml:upload_flow": {"@xmlns:oml": "...", "oml:id": "42"}} # xmltodict always gives exactly one root key ((_, root_value),) = parsed.items() @@ -123,14 +123,14 @@ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: if not isinstance(root_value, Mapping): raise ValueError("Unexpected XML structure") - # upload node (e.g. oml:upload_task, oml:study_upload, ...) - ((_, upload_value),) = root_value.items() + # Look for oml:id directly in the root value + if "oml:id" in root_value: + id_value = root_value["oml:id"] + if isinstance(id_value, (str, int)): + return int(id_value) - if not isinstance(upload_value, Mapping): - raise ValueError("Unexpected upload node structure") - - # ID is the only leaf value - for v in upload_value.values(): + # Fallback: check all values for numeric/string IDs + for v in root_value.values(): if isinstance(v, (str, int)): return int(v) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 808321862..c16759558 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -149,10 +149,5 @@ def test_post_and_delete(self): finally: # DELETE the task if it was created if task_id is not None: - try: - del_response = self.http_client.delete(f"task/{task_id}") - # optional: verify delete - if del_response.status_code != 200: - print(f"Warning: delete failed for task {task_id}") - except Exception as e: - print(f"Warning: failed to delete task {task_id}: {e}") + del_response = self.http_client.delete(f"task/{task_id}") + self.assertEqual(del_response.status_code, 200) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py new file mode 100644 index 000000000..d3b1cd45d --- /dev/null +++ b/tests/test_api/test_versions.py @@ -0,0 +1,44 @@ +import pytest +from openml.testing import TestAPIBase +from openml._api.resources.base.versions import ResourceV1 +from openml._api.resources.base.resources import ResourceType + + +class TestResourceV1(TestAPIBase): + def setUp(self): + super().setUp() + self.resource = ResourceV1(self.http_client) + self.resource.resource_type = ResourceType.TASK + + @pytest.mark.uses_test_server() + def test_publish_and_delete(self): + task_xml = """ + + 5 + 193 + 17 + + """ + + task_id = None + try: + # Publish the task + task_id = self.resource.publish( + "task", + files={"description": task_xml}, + ) + + # Get the task to verify it exists + get_response = self.http_client.get(f"task/{task_id}") + self.assertEqual(get_response.status_code, 200) + + finally: + # delete the task if it was created + if task_id is not None: + success = self.resource.delete(task_id) + self.assertTrue(success) + + + @pytest.mark.uses_test_server() + def test_tag_and_untag(self): + pass From 08d991686843fc2ff5d8182e96a162bc2e706f52 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 29 Jan 2026 22:05:24 +0500 Subject: [PATCH 040/156] fix cache_key in tests --- tests/test_api/test_http.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index c16759558..efaeaeeef 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -3,6 +3,7 @@ import xmltodict import pytest from openml.testing import TestAPIBase +import os class TestHTTPClient(TestAPIBase): @@ -11,12 +12,19 @@ def test_cache(self): params = {"param1": "value1", "param2": "value2"} key = self.cache.get_key(url, params) + expected_key = os.path.join( + "org", + "openml", + "test", + "api", + "v1", + "task", + "31", + "param1=value1¶m2=value2", + ) # validate key - self.assertEqual( - key, - "org/openml/test/api/v1/task/31/param1=value1¶m2=value2", - ) + self.assertEqual(key, expected_key) # create fake response req = Request("GET", url).prepare() From 8caba11111d93fd438915e3f697a634d362eba1f Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 30 Jan 2026 11:47:41 +0500 Subject: [PATCH 041/156] update _not_supported --- openml/_api/resources/base/base.py | 19 +++++++++++-------- openml/_api/resources/base/fallback.py | 8 +++++--- openml/_api/resources/base/versions.py | 16 ++++++++-------- openml/_api/resources/tasks.py | 6 +++--- openml/exceptions.py | 4 ++++ 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 63d4c40eb..38ceccbac 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -2,7 +2,9 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, NoReturn + +from openml.exceptions import OpenMLNotSupportedError if TYPE_CHECKING: from collections.abc import Mapping @@ -49,11 +51,12 @@ def tag(self, resource_id: int, tag: str) -> list[str]: ... @abstractmethod def untag(self, resource_id: int, tag: str) -> list[str]: ... - def _get_not_implemented_message(self, method_name: str | None = None) -> str: - version = getattr(self.api_version, "name", "Unknown version") - resource = getattr(self.resource_type, "name", "Unknown resource") - method_info = f" Method: {method_name}" if method_name else "" - return ( - f"{self.__class__.__name__}: {version} API does not support this " - f"functionality for resource: {resource}.{method_info}" + def _not_supported(self, *, method: str) -> NoReturn: + version = getattr(self.api_version, "value", "unknown") + resource = getattr(self.resource_type, "value", "unknown") + + raise OpenMLNotSupportedError( + f"{self.__class__.__name__}: " + f"{version} API does not support `{method}` " + f"for resource `{resource}`" ) diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py index 253ee3865..3919c36a9 100644 --- a/openml/_api/resources/base/fallback.py +++ b/openml/_api/resources/base/fallback.py @@ -3,6 +3,8 @@ from collections.abc import Callable from typing import Any +from openml.exceptions import OpenMLNotSupportedError + class FallbackProxy: def __init__(self, *api_versions: Any): @@ -32,7 +34,7 @@ def _wrap_callable( def wrapper(*args: Any, **kwargs: Any) -> Any: try: return primary_attr(*args, **kwargs) - except NotImplementedError: + except OpenMLNotSupportedError: return self._call_fallbacks(name, primary_api, *args, **kwargs) return wrapper @@ -51,6 +53,6 @@ def _call_fallbacks( if callable(attr): try: return attr(*args, **kwargs) - except NotImplementedError: + except OpenMLNotSupportedError: continue - raise NotImplementedError(f"Could not fallback to any API for method: {name}") + raise OpenMLNotSupportedError(f"Could not fallback to any API for method: {name}") diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 6ca2dd345..04b7617b1 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -140,14 +140,14 @@ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: class ResourceV2(ResourceAPI): api_version: APIVersion = APIVersion.V2 - def publish(self, path: str, files: Mapping[str, Any] | None) -> int: - raise NotImplementedError(self._get_not_implemented_message("publish")) + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: # noqa: ARG002 + self._not_supported(method="publish") - def delete(self, resource_id: int) -> bool: - raise NotImplementedError(self._get_not_implemented_message("delete")) + def delete(self, resource_id: int) -> bool: # noqa: ARG002 + self._not_supported(method="delete") - def tag(self, resource_id: int, tag: str) -> list[str]: - raise NotImplementedError(self._get_not_implemented_message("untag")) + def tag(self, resource_id: int, tag: str) -> list[str]: # noqa: ARG002 + self._not_supported(method="tag") - def untag(self, resource_id: int, tag: str) -> list[str]: - raise NotImplementedError(self._get_not_implemented_message("untag")) + def untag(self, resource_id: int, tag: str) -> list[str]: # noqa: ARG002 + self._not_supported(method="untag") diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index 295e7a73d..8420f8e57 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -121,8 +121,8 @@ def _create_task_from_xml(self, xml: str) -> OpenMLTask: class TasksV2(ResourceV2, TasksAPI): def get( self, - task_id: int, + task_id: int, # noqa: ARG002 *, - return_response: bool = False, + return_response: bool = False, # noqa: ARG002 ) -> OpenMLTask | tuple[OpenMLTask, Response]: - raise NotImplementedError(self._get_not_implemented_message("get")) + self._not_supported(method="get") diff --git a/openml/exceptions.py b/openml/exceptions.py index fe63b8a58..26c2d2591 100644 --- a/openml/exceptions.py +++ b/openml/exceptions.py @@ -65,3 +65,7 @@ class OpenMLNotAuthorizedError(OpenMLServerError): class ObjectNotPublishedError(PyOpenMLError): """Indicates an object has not been published yet.""" + + +class OpenMLNotSupportedError(PyOpenMLError): + """Raised when an API operation is not supported for a resource/version.""" From 1913c10416b74421709601d5177c1e67db93a401 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:27:36 +0100 Subject: [PATCH 042/156] add 'get_api_config' skeleton method --- openml/_api/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openml/_api/config.py b/openml/_api/config.py index 6cce06403..2201420d9 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -41,6 +41,9 @@ class Settings: connection: ConnectionConfig cache: CacheConfig + def get_api_config(self, version: str) -> APIConfig: + pass + settings = Settings( api=APISettings( From 7681949675f3c72e09d09d810aaa11acd78c6811 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:29:13 +0100 Subject: [PATCH 043/156] remove 'APISettings' --- openml/_api/config.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 2201420d9..893b950c6 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -17,12 +17,6 @@ class APIConfig: timeout: int = 10 # seconds -@dataclass -class APISettings: - v1: APIConfig - v2: APIConfig - - @dataclass class ConnectionConfig: retries: int = 3 From 01840a5a09442228f708daf45c32acbd05ce0e8b Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:34:11 +0100 Subject: [PATCH 044/156] impl. 'get_api_config' --- openml/_api/config.py | 54 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 893b950c6..8600156f7 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -29,14 +29,58 @@ class CacheConfig: ttl: int = 60 * 60 * 24 * 7 # one week -@dataclass class Settings: - api: APISettings - connection: ConnectionConfig - cache: CacheConfig + def __init__(self) -> None: + self.api_configs: dict[str, APIConfig] = {} + self.connection = ConnectionConfig() + self.cache = CacheConfig() + self._initialized = False def get_api_config(self, version: str) -> APIConfig: - pass + """Get API config for a version, with lazy initialization from openml.config.""" + if not self._initialized: + self._init_from_legacy_config() + if version not in self.api_configs: + raise NotImplementedError( + f"API {version} is not yet available. " + f"Supported versions: {list(self.api_configs.keys())}" + ) + return self.api_configs[version] + + def _init_from_legacy_config(self) -> None: + """Lazy init from openml.config to avoid circular imports.""" + if self._initialized: + return + + # Import here to avoid circular import at module load time + import openml.config as legacy + + # Parse server URL to extract base components + # e.g., "https://www.openml.org/api/v1/xml" -> server="https://www.openml.org/" + server_url = legacy.server + if "/api" in server_url: + server_base = server_url.rsplit("/api", 1)[0] + "/" + else: + server_base = server_url + + self.api_configs["v1"] = APIConfig( + server=server_base, + base_url="api/v1/xml/", + api_key=legacy.apikey, + ) + + # Sync connection settings from legacy config + self.connection = ConnectionConfig( + retries=legacy.connection_n_retries, + retry_policy=RetryPolicy(legacy.retry_policy), + ) + + # Sync cache settings from legacy config + self.cache = CacheConfig( + dir=str(legacy._root_cache_directory), + ) + + self._initialized = True settings = Settings( From 26ed4c1ee0ab9571f74726795e050b7d47110227 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:39:43 +0100 Subject: [PATCH 045/156] add singleton pattern for settings --- openml/_api/config.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/openml/_api/config.py b/openml/_api/config.py index 8600156f7..ee3240556 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -99,3 +99,18 @@ def _init_from_legacy_config(self) -> None: connection=ConnectionConfig(), cache=CacheConfig(), ) + + +_settings = None + + +def get_settings() -> Settings: + """Get settings singleton, creating on first access. + + Settings are lazily initialized from openml.config when first accessed, + avoiding circular imports at module load time. + """ + global _settings + if _settings is None: + _settings = Settings() + return _settings From c588d0cd456233894fa67a56e7a814c36ca25761 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:40:19 +0100 Subject: [PATCH 046/156] add 'reset_settings' --- openml/_api/config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/openml/_api/config.py b/openml/_api/config.py index ee3240556..5670698c8 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -114,3 +114,9 @@ def get_settings() -> Settings: if _settings is None: _settings = Settings() return _settings + + +def reset_settings() -> None: + """Reset the settings singleton. Could be useful for testing.""" + global _settings + _settings = None From b6ff7207c5d8428c885f498986d2a5abf0d66ac3 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:40:32 +0100 Subject: [PATCH 047/156] remove unused code --- openml/_api/config.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 5670698c8..4dc408428 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -83,24 +83,6 @@ def _init_from_legacy_config(self) -> None: self._initialized = True -settings = Settings( - api=APISettings( - v1=APIConfig( - server="https://www.openml.org/", - base_url="api/v1/xml/", - api_key="...", - ), - v2=APIConfig( - server="http://127.0.0.1:8001/", - base_url="", - api_key="...", - ), - ), - connection=ConnectionConfig(), - cache=CacheConfig(), -) - - _settings = None From 80d5afc1e0784abe264b10abaabe40fec7984792 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:44:44 +0100 Subject: [PATCH 048/156] reimplement usage of v1 settings config --- openml/_api/runtime/core.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 4914179f8..5e55d61cb 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING from openml._api.clients import HTTPCache, HTTPClient -from openml._api.config import settings +from openml._api.config import get_settings from openml._api.resources import ( DatasetsV1, DatasetsV2, @@ -18,30 +18,29 @@ class APIBackend: - def __init__(self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy): + def __init__( + self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy + ): self.datasets = datasets self.tasks = tasks def build_backend(version: str, *, strict: bool) -> APIBackend: + settings = get_settings() + + # Get config for v1 (lazy init from openml.config) + v1_config = settings.get_api_config("v1") + http_cache = HTTPCache( - path=Path(settings.cache.dir), + path=Path(settings.cache.dir).expanduser(), ttl=settings.cache.ttl, ) + v1_http_client = HTTPClient( - server=settings.api.v1.server, - base_url=settings.api.v1.base_url, - api_key=settings.api.v1.api_key, - timeout=settings.api.v1.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - v2_http_client = HTTPClient( - server=settings.api.v2.server, - base_url=settings.api.v2.base_url, - api_key=settings.api.v2.api_key, - timeout=settings.api.v2.timeout, + server=v1_config.server, + base_url=v1_config.base_url, + api_key=v1_config.api_key, + timeout=v1_config.timeout, retries=settings.connection.retries, retry_policy=settings.connection.retry_policy, cache=http_cache, From f47112c7b9eb1710ddf7b79ea97b3f8c0b0cbf49 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:47:25 +0100 Subject: [PATCH 049/156] first try v2, fallback to v1 if not available --- openml/_api/runtime/core.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 5e55d61cb..24fd2c248 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -54,6 +54,25 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: if version == "v1": return v1 + # V2 support - will raise NotImplementedError if v2 config not available + try: + v2_config = settings.get_api_config("v2") + except NotImplementedError: + if strict: + raise + # Non-strict mode: fall back to v1 only + return v1 + + v2_http_client = HTTPClient( + server=v2_config.server, + base_url=v2_config.base_url, + api_key=v2_config.api_key, + timeout=v2_config.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + cache=http_cache, + ) + v2 = APIBackend( datasets=DatasetsV2(v2_http_client), tasks=TasksV2(v2_http_client), From d44cf3eb5e36587ad033e24b1e54863e98df2d91 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:58:42 +0100 Subject: [PATCH 050/156] reimplement singelton without the use of 'global' --- openml/_api/config.py | 46 +++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 4dc408428..c375542b8 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -30,12 +30,28 @@ class CacheConfig: class Settings: + """Settings container that reads from openml.config on access.""" + + _instance: Settings | None = None + def __init__(self) -> None: self.api_configs: dict[str, APIConfig] = {} self.connection = ConnectionConfig() self.cache = CacheConfig() self._initialized = False + @classmethod + def get(cls) -> Settings: + """Get settings singleton, creating on first access.""" + if cls._instance is None: + cls._instance = cls() + return cls._instance + + @classmethod + def reset(cls) -> None: + """Reset the settings singleton. Useful for testing.""" + cls._instance = None + def get_api_config(self, version: str) -> APIConfig: """Get API config for a version, with lazy initialization from openml.config.""" if not self._initialized: @@ -52,11 +68,8 @@ def _init_from_legacy_config(self) -> None: if self._initialized: return - # Import here to avoid circular import at module load time - import openml.config as legacy + import openml.config as legacy # Import here to avoid circular - # Parse server URL to extract base components - # e.g., "https://www.openml.org/api/v1/xml" -> server="https://www.openml.org/" server_url = legacy.server if "/api" in server_url: server_base = server_url.rsplit("/api", 1)[0] + "/" @@ -69,36 +82,13 @@ def _init_from_legacy_config(self) -> None: api_key=legacy.apikey, ) - # Sync connection settings from legacy config + # Sync connection- and cache- settings from legacy config self.connection = ConnectionConfig( retries=legacy.connection_n_retries, retry_policy=RetryPolicy(legacy.retry_policy), ) - - # Sync cache settings from legacy config self.cache = CacheConfig( dir=str(legacy._root_cache_directory), ) self._initialized = True - - -_settings = None - - -def get_settings() -> Settings: - """Get settings singleton, creating on first access. - - Settings are lazily initialized from openml.config when first accessed, - avoiding circular imports at module load time. - """ - global _settings - if _settings is None: - _settings = Settings() - return _settings - - -def reset_settings() -> None: - """Reset the settings singleton. Could be useful for testing.""" - global _settings - _settings = None From ea7dda17087bc25d07ea7610da25b8ec04b17ca2 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 20:00:25 +0100 Subject: [PATCH 051/156] add explanations --- openml/_api/config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index c375542b8..32dd8ecf5 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -68,7 +68,11 @@ def _init_from_legacy_config(self) -> None: if self._initialized: return - import openml.config as legacy # Import here to avoid circular + # Import here (not at module level) to avoid circular imports. + # We read from openml.config to integrate with the existing config system + # where users set their API key, server, cache directory, etc. + # This avoids duplicating those settings with hardcoded values. + import openml.config as legacy server_url = legacy.server if "/api" in server_url: From f0e594784b446006e401ab4aa1d7113344b6dd0e Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 20:01:16 +0100 Subject: [PATCH 052/156] change usage of settings to new impl. --- openml/_api/runtime/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 24fd2c248..9207fc31d 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING from openml._api.clients import HTTPCache, HTTPClient -from openml._api.config import get_settings +from openml._api.config import Settings from openml._api.resources import ( DatasetsV1, DatasetsV2, @@ -26,7 +26,7 @@ def __init__( def build_backend(version: str, *, strict: bool) -> APIBackend: - settings = get_settings() + settings = Settings.get() # Get config for v1 (lazy init from openml.config) v1_config = settings.get_api_config("v1") From edcd006b574a91e367d96e5c3718daf0edbc352e Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 20:06:45 +0100 Subject: [PATCH 053/156] add explanations --- openml/_api/runtime/core.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 9207fc31d..a73105e91 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -28,7 +28,11 @@ def __init__( def build_backend(version: str, *, strict: bool) -> APIBackend: settings = Settings.get() - # Get config for v1 (lazy init from openml.config) + # Get config for v1. On first access, this triggers lazy initialization + # from openml.config, reading the user's actual API key, server URL, + # cache directory, and retry settings. This avoids circular imports + # (openml.config is imported inside the method, not at module load time) + # and ensures we use the user's configured values rather than hardcoded defaults. v1_config = settings.get_api_config("v1") http_cache = HTTPCache( @@ -54,7 +58,11 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: if version == "v1": return v1 - # V2 support - will raise NotImplementedError if v2 config not available + # V2 support. Currently v2 is not yet available, + # so get_api_config("v2") raises NotImplementedError. When v2 becomes available, + # its config will be added to Settings._init_from_legacy_config(). + # In strict mode: propagate the error. + # In non-strict mode: silently fall back to v1 only. try: v2_config = settings.get_api_config("v2") except NotImplementedError: From cde0aaeb7657a03fe6547a9b252a2f13457fc7f0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 19:10:42 +0000 Subject: [PATCH 054/156] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openml/_api/config.py | 5 +---- openml/_api/runtime/core.py | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 32dd8ecf5..76d30f113 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -75,10 +75,7 @@ def _init_from_legacy_config(self) -> None: import openml.config as legacy server_url = legacy.server - if "/api" in server_url: - server_base = server_url.rsplit("/api", 1)[0] + "/" - else: - server_base = server_url + server_base = server_url.rsplit("/api", 1)[0] + "/" if "/api" in server_url else server_url self.api_configs["v1"] = APIConfig( server=server_base, diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index a73105e91..22b3004a4 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -18,9 +18,7 @@ class APIBackend: - def __init__( - self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy - ): + def __init__(self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy): self.datasets = datasets self.tasks = tasks From aa1e5602b87caf59680434a17fe6cc6532f58419 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sun, 1 Feb 2026 11:29:33 +0500 Subject: [PATCH 055/156] move to config: APIVersion, ResourceType --- openml/_api/config.py | 19 +++++++++++++++++++ openml/_api/resources/base/__init__.py | 4 +--- openml/_api/resources/base/base.py | 21 +-------------------- openml/_api/resources/base/resources.py | 3 ++- openml/_api/resources/base/versions.py | 3 ++- tests/test_api/test_versions.py | 2 +- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 76d30f113..3afbf224f 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -4,6 +4,25 @@ from enum import Enum +class APIVersion(str, Enum): + V1 = "v1" + V2 = "v2" + + +class ResourceType(str, Enum): + DATASET = "dataset" + TASK = "task" + TASK_TYPE = "task_type" + EVALUATION_MEASURE = "evaluation_measure" + ESTIMATION_PROCEDURE = "estimation_procedure" + EVALUATION = "evaluation" + FLOW = "flow" + STUDY = "study" + RUN = "run" + SETUP = "setup" + USER = "user" + + class RetryPolicy(str, Enum): HUMAN = "human" ROBOT = "robot" diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py index bddc09b21..089729d09 100644 --- a/openml/_api/resources/base/__init__.py +++ b/openml/_api/resources/base/__init__.py @@ -1,14 +1,12 @@ -from openml._api.resources.base.base import APIVersion, ResourceAPI, ResourceType +from openml._api.resources.base.base import ResourceAPI from openml._api.resources.base.fallback import FallbackProxy from openml._api.resources.base.resources import DatasetsAPI, TasksAPI from openml._api.resources.base.versions import ResourceV1, ResourceV2 __all__ = [ - "APIVersion", "DatasetsAPI", "FallbackProxy", "ResourceAPI", - "ResourceType", "ResourceV1", "ResourceV2", "TasksAPI", diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 38ceccbac..dbe3e95ea 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -1,7 +1,6 @@ from __future__ import annotations from abc import ABC, abstractmethod -from enum import Enum from typing import TYPE_CHECKING, NoReturn from openml.exceptions import OpenMLNotSupportedError @@ -11,25 +10,7 @@ from typing import Any from openml._api.clients import HTTPClient - - -class APIVersion(str, Enum): - V1 = "v1" - V2 = "v2" - - -class ResourceType(str, Enum): - DATASET = "dataset" - TASK = "task" - TASK_TYPE = "task_type" - EVALUATION_MEASURE = "evaluation_measure" - ESTIMATION_PROCEDURE = "estimation_procedure" - EVALUATION = "evaluation" - FLOW = "flow" - STUDY = "study" - RUN = "run" - SETUP = "setup" - USER = "user" + from openml._api.config import APIVersion, ResourceType class ResourceAPI(ABC): diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 55cb95c0d..406bdfa50 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -3,7 +3,8 @@ from abc import abstractmethod from typing import TYPE_CHECKING -from openml._api.resources.base import ResourceAPI, ResourceType +from openml._api.config import ResourceType +from openml._api.resources.base import ResourceAPI if TYPE_CHECKING: from requests import Response diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 04b7617b1..990c3f791 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -5,7 +5,8 @@ import xmltodict -from openml._api.resources.base import APIVersion, ResourceAPI, ResourceType +from openml._api.config import APIVersion, ResourceType +from openml._api.resources.base import ResourceAPI from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index d3b1cd45d..9eb4c7a91 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,7 +1,7 @@ import pytest from openml.testing import TestAPIBase from openml._api.resources.base.versions import ResourceV1 -from openml._api.resources.base.resources import ResourceType +from openml._api.config import ResourceType class TestResourceV1(TestAPIBase): From 06b8497eb552e2c880e93f19224a534bef37986b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sun, 1 Feb 2026 11:48:04 +0500 Subject: [PATCH 056/156] remove api_context entirely --- openml/__init__.py | 2 ++ openml/_api/__init__.py | 8 -------- openml/_api/runtime/core.py | 12 ------------ openml/_api/runtime/instance.py | 5 +++++ 4 files changed, 7 insertions(+), 20 deletions(-) create mode 100644 openml/_api/runtime/instance.py diff --git a/openml/__init__.py b/openml/__init__.py index ae5db261f..a7c95dc2e 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -33,6 +33,7 @@ utils, ) from .__version__ import __version__ +from ._api.runtime.instance import _backend from .datasets import OpenMLDataFeature, OpenMLDataset from .evaluations import OpenMLEvaluation from .flows import OpenMLFlow @@ -109,6 +110,7 @@ def populate_cache( "OpenMLTask", "__version__", "_api_calls", + "_backend", "config", "datasets", "evaluations", diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 881f40671..e69de29bb 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -1,8 +0,0 @@ -from openml._api.runtime.core import APIContext - - -def set_api_version(version: str, *, strict: bool = False) -> None: - api_context.set_version(version=version, strict=strict) - - -api_context = APIContext() diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 22b3004a4..d4ae9b688 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -91,15 +91,3 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: datasets=FallbackProxy(DatasetsV2(v2_http_client), DatasetsV1(v1_http_client)), tasks=FallbackProxy(TasksV2(v2_http_client), TasksV1(v1_http_client)), ) - - -class APIContext: - def __init__(self) -> None: - self._backend = build_backend("v1", strict=False) - - def set_version(self, version: str, *, strict: bool = False) -> None: - self._backend = build_backend(version=version, strict=strict) - - @property - def backend(self) -> APIBackend: - return self._backend diff --git a/openml/_api/runtime/instance.py b/openml/_api/runtime/instance.py new file mode 100644 index 000000000..0d945b084 --- /dev/null +++ b/openml/_api/runtime/instance.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from openml._api.runtime.core import APIBackend, build_backend + +_backend: APIBackend = build_backend("v1", strict=False) From 384da91b80d91526826df3afda4ac2624562f6f7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sun, 1 Feb 2026 14:40:13 +0500 Subject: [PATCH 057/156] major refactor --- openml/_api/clients/__init__.py | 2 + openml/_api/clients/minio.py | 11 + openml/_api/resources/__init__.py | 36 ++- openml/_api/resources/base/__init__.py | 29 +- openml/_api/resources/base/resources.py | 49 ++-- openml/_api/resources/base/versions.py | 4 +- openml/_api/resources/dataset.py | 11 + openml/_api/resources/datasets.py | 20 -- openml/_api/resources/estimation_procedure.py | 11 + openml/_api/resources/evaluation.py | 11 + openml/_api/resources/evaluation_measure.py | 11 + openml/_api/resources/flow.py | 11 + openml/_api/resources/run.py | 11 + openml/_api/resources/setup.py | 11 + openml/_api/resources/study.py | 11 + openml/_api/resources/task.py | 11 + openml/_api/resources/tasks.py | 128 --------- openml/_api/runtime/core.py | 251 ++++++++++++------ openml/_api/runtime/instance.py | 4 +- tests/test_api/test_versions.py | 6 +- 20 files changed, 382 insertions(+), 257 deletions(-) create mode 100644 openml/_api/resources/dataset.py delete mode 100644 openml/_api/resources/datasets.py create mode 100644 openml/_api/resources/estimation_procedure.py create mode 100644 openml/_api/resources/evaluation.py create mode 100644 openml/_api/resources/evaluation_measure.py create mode 100644 openml/_api/resources/flow.py create mode 100644 openml/_api/resources/run.py create mode 100644 openml/_api/resources/setup.py create mode 100644 openml/_api/resources/study.py create mode 100644 openml/_api/resources/task.py delete mode 100644 openml/_api/resources/tasks.py diff --git a/openml/_api/clients/__init__.py b/openml/_api/clients/__init__.py index 8a5ff94e4..42f11fbcf 100644 --- a/openml/_api/clients/__init__.py +++ b/openml/_api/clients/__init__.py @@ -1,6 +1,8 @@ from .http import HTTPCache, HTTPClient +from .minio import MinIOClient __all__ = [ "HTTPCache", "HTTPClient", + "MinIOClient", ] diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py index e69de29bb..2edc8269b 100644 --- a/openml/_api/clients/minio.py +++ b/openml/_api/clients/minio.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from pathlib import Path + +from openml.__version__ import __version__ + + +class MinIOClient: + def __init__(self, path: Path | None = None) -> None: + self.path = path + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index 6c0807e0f..b666c018b 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,5 +1,35 @@ from openml._api.resources.base.fallback import FallbackProxy -from openml._api.resources.datasets import DatasetsV1, DatasetsV2 -from openml._api.resources.tasks import TasksV1, TasksV2 +from openml._api.resources.dataset import DatasetV1API, DatasetV2API +from openml._api.resources.estimation_procedure import ( + EstimationProcedureV1API, + EstimationProcedureV2API, +) +from openml._api.resources.evaluation import EvaluationV1API, EvaluationV2API +from openml._api.resources.evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API +from openml._api.resources.flow import FlowV1API, FlowV2API +from openml._api.resources.run import RunV1API, RunV2API +from openml._api.resources.setup import SetupV1API, SetupV2API +from openml._api.resources.study import StudyV1API, StudyV2API +from openml._api.resources.task import TaskV1API, TaskV2API -__all__ = ["DatasetsV1", "DatasetsV2", "FallbackProxy", "TasksV1", "TasksV2"] +__all__ = [ + "DatasetV1API", + "DatasetV2API", + "EstimationProcedureV1API", + "EstimationProcedureV2API", + "EvaluationMeasureV1API", + "EvaluationMeasureV2API", + "EvaluationV1API", + "EvaluationV2API", + "FallbackProxy", + "FlowV1API", + "FlowV2API", + "RunV1API", + "RunV2API", + "SetupV1API", + "SetupV2API", + "StudyV1API", + "StudyV2API", + "TaskV1API", + "TaskV2API", +] diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py index 089729d09..f222a0b87 100644 --- a/openml/_api/resources/base/__init__.py +++ b/openml/_api/resources/base/__init__.py @@ -1,13 +1,30 @@ from openml._api.resources.base.base import ResourceAPI from openml._api.resources.base.fallback import FallbackProxy -from openml._api.resources.base.resources import DatasetsAPI, TasksAPI -from openml._api.resources.base.versions import ResourceV1, ResourceV2 +from openml._api.resources.base.resources import ( + DatasetAPI, + EstimationProcedureAPI, + EvaluationAPI, + EvaluationMeasureAPI, + FlowAPI, + RunAPI, + SetupAPI, + StudyAPI, + TaskAPI, +) +from openml._api.resources.base.versions import ResourceV1API, ResourceV2API __all__ = [ - "DatasetsAPI", + "DatasetAPI", + "EstimationProcedureAPI", + "EvaluationAPI", + "EvaluationMeasureAPI", "FallbackProxy", + "FlowAPI", "ResourceAPI", - "ResourceV1", - "ResourceV2", - "TasksAPI", + "ResourceV1API", + "ResourceV2API", + "RunAPI", + "SetupAPI", + "StudyAPI", + "TaskAPI", ] diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 406bdfa50..200278fc2 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,32 +1,49 @@ from __future__ import annotations -from abc import abstractmethod from typing import TYPE_CHECKING from openml._api.config import ResourceType from openml._api.resources.base import ResourceAPI if TYPE_CHECKING: - from requests import Response + from openml._api.clients import HTTPClient, MinIOClient - from openml.datasets.dataset import OpenMLDataset - from openml.tasks.task import OpenMLTask - -class DatasetsAPI(ResourceAPI): +class DatasetAPI(ResourceAPI): resource_type: ResourceType = ResourceType.DATASET - @abstractmethod - def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... + def __init__(self, http: HTTPClient, minio: MinIOClient): + self._minio = minio + super().__init__(http) -class TasksAPI(ResourceAPI): +class TaskAPI(ResourceAPI): resource_type: ResourceType = ResourceType.TASK - @abstractmethod - def get( - self, - task_id: int, - *, - return_response: bool = False, - ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... + +class EvaluationMeasureAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.EVALUATION_MEASURE + + +class EstimationProcedureAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.ESTIMATION_PROCEDURE + + +class EvaluationAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.EVALUATION + + +class FlowAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.FLOW + + +class StudyAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.STUDY + + +class RunAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.RUN + + +class SetupAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.SETUP diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 990c3f791..88ae87a1c 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -14,7 +14,7 @@ ) -class ResourceV1(ResourceAPI): +class ResourceV1API(ResourceAPI): api_version: APIVersion = APIVersion.V1 def publish(self, path: str, files: Mapping[str, Any] | None) -> int: @@ -138,7 +138,7 @@ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: raise ValueError("No ID found in upload response") -class ResourceV2(ResourceAPI): +class ResourceV2API(ResourceAPI): api_version: APIVersion = APIVersion.V2 def publish(self, path: str, files: Mapping[str, Any] | None) -> int: # noqa: ARG002 diff --git a/openml/_api/resources/dataset.py b/openml/_api/resources/dataset.py new file mode 100644 index 000000000..3ecad35da --- /dev/null +++ b/openml/_api/resources/dataset.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import DatasetAPI, ResourceV1API, ResourceV2API + + +class DatasetV1API(ResourceV1API, DatasetAPI): + pass + + +class DatasetV2API(ResourceV2API, DatasetAPI): + pass diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py deleted file mode 100644 index f3a49a84f..000000000 --- a/openml/_api/resources/datasets.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from openml._api.resources.base import DatasetsAPI, ResourceV1, ResourceV2 - -if TYPE_CHECKING: - from responses import Response - - from openml.datasets.dataset import OpenMLDataset - - -class DatasetsV1(ResourceV1, DatasetsAPI): - def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: - raise NotImplementedError - - -class DatasetsV2(ResourceV2, DatasetsAPI): - def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: - raise NotImplementedError diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py new file mode 100644 index 000000000..d2e73cfa6 --- /dev/null +++ b/openml/_api/resources/estimation_procedure.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import EstimationProcedureAPI, ResourceV1API, ResourceV2API + + +class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI): + pass + + +class EstimationProcedureV2API(ResourceV2API, EstimationProcedureAPI): + pass diff --git a/openml/_api/resources/evaluation.py b/openml/_api/resources/evaluation.py new file mode 100644 index 000000000..a0149e1e5 --- /dev/null +++ b/openml/_api/resources/evaluation.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import EvaluationAPI, ResourceV1API, ResourceV2API + + +class EvaluationV1API(ResourceV1API, EvaluationAPI): + pass + + +class EvaluationV2API(ResourceV2API, EvaluationAPI): + pass diff --git a/openml/_api/resources/evaluation_measure.py b/openml/_api/resources/evaluation_measure.py new file mode 100644 index 000000000..bd4318417 --- /dev/null +++ b/openml/_api/resources/evaluation_measure.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import EvaluationMeasureAPI, ResourceV1API, ResourceV2API + + +class EvaluationMeasureV1API(ResourceV1API, EvaluationMeasureAPI): + pass + + +class EvaluationMeasureV2API(ResourceV2API, EvaluationMeasureAPI): + pass diff --git a/openml/_api/resources/flow.py b/openml/_api/resources/flow.py new file mode 100644 index 000000000..3b62abd3f --- /dev/null +++ b/openml/_api/resources/flow.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import FlowAPI, ResourceV1API, ResourceV2API + + +class FlowV1API(ResourceV1API, FlowAPI): + pass + + +class FlowV2API(ResourceV2API, FlowAPI): + pass diff --git a/openml/_api/resources/run.py b/openml/_api/resources/run.py new file mode 100644 index 000000000..9698c59dd --- /dev/null +++ b/openml/_api/resources/run.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import ResourceV1API, ResourceV2API, RunAPI + + +class RunV1API(ResourceV1API, RunAPI): + pass + + +class RunV2API(ResourceV2API, RunAPI): + pass diff --git a/openml/_api/resources/setup.py b/openml/_api/resources/setup.py new file mode 100644 index 000000000..e948e1b38 --- /dev/null +++ b/openml/_api/resources/setup.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import ResourceV1API, ResourceV2API, SetupAPI + + +class SetupV1API(ResourceV1API, SetupAPI): + pass + + +class SetupV2API(ResourceV2API, SetupAPI): + pass diff --git a/openml/_api/resources/study.py b/openml/_api/resources/study.py new file mode 100644 index 000000000..8de5868d1 --- /dev/null +++ b/openml/_api/resources/study.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import ResourceV1API, ResourceV2API, StudyAPI + + +class StudyV1API(ResourceV1API, StudyAPI): + pass + + +class StudyV2API(ResourceV2API, StudyAPI): + pass diff --git a/openml/_api/resources/task.py b/openml/_api/resources/task.py new file mode 100644 index 000000000..a97d5f726 --- /dev/null +++ b/openml/_api/resources/task.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import ResourceV1API, ResourceV2API, TaskAPI + + +class TaskV1API(ResourceV1API, TaskAPI): + pass + + +class TaskV2API(ResourceV2API, TaskAPI): + pass diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py deleted file mode 100644 index 8420f8e57..000000000 --- a/openml/_api/resources/tasks.py +++ /dev/null @@ -1,128 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -import xmltodict - -from openml._api.resources.base import ResourceV1, ResourceV2, TasksAPI -from openml.tasks.task import ( - OpenMLClassificationTask, - OpenMLClusteringTask, - OpenMLLearningCurveTask, - OpenMLRegressionTask, - OpenMLTask, - TaskType, -) - -if TYPE_CHECKING: - from requests import Response - - -class TasksV1(ResourceV1, TasksAPI): - def get( - self, - task_id: int, - *, - return_response: bool = False, - ) -> OpenMLTask | tuple[OpenMLTask, Response]: - path = f"task/{task_id}" - response = self._http.get(path, use_cache=True) - xml_content = response.text - task = self._create_task_from_xml(xml_content) - - if return_response: - return task, response - - return task - - def _create_task_from_xml(self, xml: str) -> OpenMLTask: - """Create a task given a xml string. - - Parameters - ---------- - xml : string - Task xml representation. - - Returns - ------- - OpenMLTask - """ - dic = xmltodict.parse(xml)["oml:task"] - estimation_parameters = {} - inputs = {} - # Due to the unordered structure we obtain, we first have to extract - # the possible keys of oml:input; dic["oml:input"] is a list of - # OrderedDicts - - # Check if there is a list of inputs - if isinstance(dic["oml:input"], list): - for input_ in dic["oml:input"]: - name = input_["@name"] - inputs[name] = input_ - # Single input case - elif isinstance(dic["oml:input"], dict): - name = dic["oml:input"]["@name"] - inputs[name] = dic["oml:input"] - - evaluation_measures = None - if "evaluation_measures" in inputs: - evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][ - "oml:evaluation_measure" - ] - - task_type = TaskType(int(dic["oml:task_type_id"])) - common_kwargs = { - "task_id": dic["oml:task_id"], - "task_type": dic["oml:task_type"], - "task_type_id": task_type, - "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], - "evaluation_measure": evaluation_measures, - } - # TODO: add OpenMLClusteringTask? - if task_type in ( - TaskType.SUPERVISED_CLASSIFICATION, - TaskType.SUPERVISED_REGRESSION, - TaskType.LEARNING_CURVE, - ): - # Convert some more parameters - for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ - "oml:parameter" - ]: - name = parameter["@name"] - text = parameter.get("#text", "") - estimation_parameters[name] = text - - common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][ - "oml:estimation_procedure" - ]["oml:type"] - common_kwargs["estimation_procedure_id"] = int( - inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"] - ) - - common_kwargs["estimation_parameters"] = estimation_parameters - common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][ - "oml:target_feature" - ] - common_kwargs["data_splits_url"] = inputs["estimation_procedure"][ - "oml:estimation_procedure" - ]["oml:data_splits_url"] - - cls = { - TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, - TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, - TaskType.CLUSTERING: OpenMLClusteringTask, - TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, - }.get(task_type) - if cls is None: - raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") - return cls(**common_kwargs) # type: ignore - - -class TasksV2(ResourceV2, TasksAPI): - def get( - self, - task_id: int, # noqa: ARG002 - *, - return_response: bool = False, # noqa: ARG002 - ) -> OpenMLTask | tuple[OpenMLTask, Response]: - self._not_supported(method="get") diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index d4ae9b688..9c3ff70a5 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -3,91 +3,188 @@ from pathlib import Path from typing import TYPE_CHECKING -from openml._api.clients import HTTPCache, HTTPClient +from openml._api.clients import HTTPCache, HTTPClient, MinIOClient from openml._api.config import Settings from openml._api.resources import ( - DatasetsV1, - DatasetsV2, + DatasetV1API, + DatasetV2API, + EstimationProcedureV1API, + EstimationProcedureV2API, + EvaluationMeasureV1API, + EvaluationMeasureV2API, + EvaluationV1API, + EvaluationV2API, FallbackProxy, - TasksV1, - TasksV2, + FlowV1API, + FlowV2API, + RunV1API, + RunV2API, + SetupV1API, + SetupV2API, + StudyV1API, + StudyV2API, + TaskV1API, + TaskV2API, ) if TYPE_CHECKING: - from openml._api.resources.base import DatasetsAPI, TasksAPI - - -class APIBackend: - def __init__(self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy): - self.datasets = datasets - self.tasks = tasks - - -def build_backend(version: str, *, strict: bool) -> APIBackend: - settings = Settings.get() - - # Get config for v1. On first access, this triggers lazy initialization - # from openml.config, reading the user's actual API key, server URL, - # cache directory, and retry settings. This avoids circular imports - # (openml.config is imported inside the method, not at module load time) - # and ensures we use the user's configured values rather than hardcoded defaults. - v1_config = settings.get_api_config("v1") - - http_cache = HTTPCache( - path=Path(settings.cache.dir).expanduser(), - ttl=settings.cache.ttl, + from openml._api.resources.base import ( + DatasetAPI, + EstimationProcedureAPI, + EvaluationAPI, + EvaluationMeasureAPI, + FlowAPI, + RunAPI, + SetupAPI, + StudyAPI, + TaskAPI, ) - v1_http_client = HTTPClient( - server=v1_config.server, - base_url=v1_config.base_url, - api_key=v1_config.api_key, - timeout=v1_config.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - v1 = APIBackend( - datasets=DatasetsV1(v1_http_client), - tasks=TasksV1(v1_http_client), - ) +class APIBackend: + def __init__( # noqa: PLR0913 + self, + *, + dataset: DatasetAPI | FallbackProxy, + task: TaskAPI | FallbackProxy, + evaluation_measure: EvaluationMeasureAPI | FallbackProxy, + estimation_procedure: EstimationProcedureAPI | FallbackProxy, + evaluation: EvaluationAPI | FallbackProxy, + flow: FlowAPI | FallbackProxy, + study: StudyAPI | FallbackProxy, + run: RunAPI | FallbackProxy, + setup: SetupAPI | FallbackProxy, + ): + self.dataset = dataset + self.task = task + self.evaluation_measure = evaluation_measure + self.estimation_procedure = estimation_procedure + self.evaluation = evaluation + self.flow = flow + self.study = study + self.run = run + self.setup = setup + + @classmethod + def build(cls, version: str, *, strict: bool) -> APIBackend: + settings = Settings.get() + + # Get config for v1. On first access, this triggers lazy initialization + # from openml.config, reading the user's actual API key, server URL, + # cache directory, and retry settings. This avoids circular imports + # (openml.config is imported inside the method, not at module load time) + # and ensures we use the user's configured values rather than hardcoded defaults. + v1_config = settings.get_api_config("v1") + + http_cache = HTTPCache( + path=Path(settings.cache.dir).expanduser(), + ttl=settings.cache.ttl, + ) + minio_client = MinIOClient( + path=Path(settings.cache.dir).expanduser(), + ) + + v1_http_client = HTTPClient( + server=v1_config.server, + base_url=v1_config.base_url, + api_key=v1_config.api_key, + timeout=v1_config.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + cache=http_cache, + ) + v1_dataset = DatasetV1API(v1_http_client, minio_client) + v1_task = TaskV1API(v1_http_client) + v1_evaluation_measure = EvaluationMeasureV1API(v1_http_client) + v1_estimation_procedure = EstimationProcedureV1API(v1_http_client) + v1_evaluation = EvaluationV1API(v1_http_client) + v1_flow = FlowV1API(v1_http_client) + v1_study = StudyV1API(v1_http_client) + v1_run = RunV1API(v1_http_client) + v1_setup = SetupV1API(v1_http_client) + + v1 = cls( + dataset=v1_dataset, + task=v1_task, + evaluation_measure=v1_evaluation_measure, + estimation_procedure=v1_estimation_procedure, + evaluation=v1_evaluation, + flow=v1_flow, + study=v1_study, + run=v1_run, + setup=v1_setup, + ) + + if version == "v1": + return v1 + + # V2 support. Currently v2 is not yet available, + # so get_api_config("v2") raises NotImplementedError. When v2 becomes available, + # its config will be added to Settings._init_from_legacy_config(). + # In strict mode: propagate the error. + # In non-strict mode: silently fall back to v1 only. + try: + v2_config = settings.get_api_config("v2") + except NotImplementedError: + if strict: + raise + # Non-strict mode: fall back to v1 only + return v1 + + v2_http_client = HTTPClient( + server=v2_config.server, + base_url=v2_config.base_url, + api_key=v2_config.api_key, + timeout=v2_config.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + cache=http_cache, + ) + v2_dataset = DatasetV2API(v2_http_client, minio_client) + v2_task = TaskV2API(v2_http_client) + v2_evaluation_measure = EvaluationMeasureV2API(v2_http_client) + v2_estimation_procedure = EstimationProcedureV2API(v2_http_client) + v2_evaluation = EvaluationV2API(v2_http_client) + v2_flow = FlowV2API(v2_http_client) + v2_study = StudyV2API(v2_http_client) + v2_run = RunV2API(v2_http_client) + v2_setup = SetupV2API(v2_http_client) + + v2 = cls( + dataset=v2_dataset, + task=v2_task, + evaluation_measure=v2_evaluation_measure, + estimation_procedure=v2_estimation_procedure, + evaluation=v2_evaluation, + flow=v2_flow, + study=v2_study, + run=v2_run, + setup=v2_setup, + ) - if version == "v1": - return v1 - - # V2 support. Currently v2 is not yet available, - # so get_api_config("v2") raises NotImplementedError. When v2 becomes available, - # its config will be added to Settings._init_from_legacy_config(). - # In strict mode: propagate the error. - # In non-strict mode: silently fall back to v1 only. - try: - v2_config = settings.get_api_config("v2") - except NotImplementedError: if strict: - raise - # Non-strict mode: fall back to v1 only - return v1 - - v2_http_client = HTTPClient( - server=v2_config.server, - base_url=v2_config.base_url, - api_key=v2_config.api_key, - timeout=v2_config.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - - v2 = APIBackend( - datasets=DatasetsV2(v2_http_client), - tasks=TasksV2(v2_http_client), - ) - - if strict: - return v2 - - return APIBackend( - datasets=FallbackProxy(DatasetsV2(v2_http_client), DatasetsV1(v1_http_client)), - tasks=FallbackProxy(TasksV2(v2_http_client), TasksV1(v1_http_client)), - ) + return v2 + + fallback_dataset = FallbackProxy(v1_dataset, v2_dataset) + fallback_task = FallbackProxy(v1_task, v2_task) + fallback_evaluation_measure = FallbackProxy(v1_evaluation_measure, v2_evaluation_measure) + fallback_estimation_procedure = FallbackProxy( + v1_estimation_procedure, v2_estimation_procedure + ) + fallback_evaluation = FallbackProxy(v1_evaluation, v2_evaluation) + fallback_flow = FallbackProxy(v1_flow, v2_flow) + fallback_study = FallbackProxy(v1_study, v2_study) + fallback_run = FallbackProxy(v1_run, v2_run) + fallback_setup = FallbackProxy(v1_setup, v2_setup) + + return cls( + dataset=fallback_dataset, + task=fallback_task, + evaluation_measure=fallback_evaluation_measure, + estimation_procedure=fallback_estimation_procedure, + evaluation=fallback_evaluation, + flow=fallback_flow, + study=fallback_study, + run=fallback_run, + setup=fallback_setup, + ) diff --git a/openml/_api/runtime/instance.py b/openml/_api/runtime/instance.py index 0d945b084..633d3f372 100644 --- a/openml/_api/runtime/instance.py +++ b/openml/_api/runtime/instance.py @@ -1,5 +1,5 @@ from __future__ import annotations -from openml._api.runtime.core import APIBackend, build_backend +from openml._api.runtime.core import APIBackend -_backend: APIBackend = build_backend("v1", strict=False) +_backend: APIBackend = APIBackend.build(version="v1", strict=False) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 9eb4c7a91..2203ab6da 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,13 +1,13 @@ import pytest from openml.testing import TestAPIBase -from openml._api.resources.base.versions import ResourceV1 +from openml._api.resources.base.versions import ResourceV1API from openml._api.config import ResourceType -class TestResourceV1(TestAPIBase): +class TestResourceV1API(TestAPIBase): def setUp(self): super().setUp() - self.resource = ResourceV1(self.http_client) + self.resource = ResourceV1API(self.http_client) self.resource.resource_type = ResourceType.TASK @pytest.mark.uses_test_server() From 187813839c57ddb0d12b702f371fe7d08220c963 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 10:37:59 +0500 Subject: [PATCH 058/156] more refactoring with setup/ --- openml/__init__.py | 2 - openml/_api/clients/http.py | 2 +- openml/_api/config.py | 114 ------------- openml/_api/resources/__init__.py | 2 + openml/_api/resources/_registry.py | 48 ++++++ openml/_api/resources/base/base.py | 7 +- openml/_api/resources/base/enums.py | 27 +++ openml/_api/resources/base/resources.py | 11 +- openml/_api/resources/base/versions.py | 2 +- openml/_api/runtime/core.py | 190 --------------------- openml/_api/runtime/instance.py | 5 - openml/_api/{runtime => setup}/__init__.py | 0 openml/_api/setup/builder.py | 71 ++++++++ openml/_api/setup/config.py | 62 +++++++ openml/_api/setup/utils.py | 49 ++++++ openml/testing.py | 2 +- tests/test_api/test_versions.py | 2 +- 17 files changed, 268 insertions(+), 328 deletions(-) delete mode 100644 openml/_api/config.py create mode 100644 openml/_api/resources/_registry.py create mode 100644 openml/_api/resources/base/enums.py delete mode 100644 openml/_api/runtime/core.py delete mode 100644 openml/_api/runtime/instance.py rename openml/_api/{runtime => setup}/__init__.py (100%) create mode 100644 openml/_api/setup/builder.py create mode 100644 openml/_api/setup/config.py create mode 100644 openml/_api/setup/utils.py diff --git a/openml/__init__.py b/openml/__init__.py index a7c95dc2e..ae5db261f 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -33,7 +33,6 @@ utils, ) from .__version__ import __version__ -from ._api.runtime.instance import _backend from .datasets import OpenMLDataFeature, OpenMLDataset from .evaluations import OpenMLEvaluation from .flows import OpenMLFlow @@ -110,7 +109,6 @@ def populate_cache( "OpenMLTask", "__version__", "_api_calls", - "_backend", "config", "datasets", "evaluations", diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index dfcdf5a8a..f700c108a 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -16,7 +16,7 @@ from requests import Response from openml.__version__ import __version__ -from openml._api.config import RetryPolicy +from openml._api.resources.base.enums import RetryPolicy from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/openml/_api/config.py b/openml/_api/config.py deleted file mode 100644 index 3afbf224f..000000000 --- a/openml/_api/config.py +++ /dev/null @@ -1,114 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import Enum - - -class APIVersion(str, Enum): - V1 = "v1" - V2 = "v2" - - -class ResourceType(str, Enum): - DATASET = "dataset" - TASK = "task" - TASK_TYPE = "task_type" - EVALUATION_MEASURE = "evaluation_measure" - ESTIMATION_PROCEDURE = "estimation_procedure" - EVALUATION = "evaluation" - FLOW = "flow" - STUDY = "study" - RUN = "run" - SETUP = "setup" - USER = "user" - - -class RetryPolicy(str, Enum): - HUMAN = "human" - ROBOT = "robot" - - -@dataclass -class APIConfig: - server: str - base_url: str - api_key: str - timeout: int = 10 # seconds - - -@dataclass -class ConnectionConfig: - retries: int = 3 - retry_policy: RetryPolicy = RetryPolicy.HUMAN - - -@dataclass -class CacheConfig: - dir: str = "~/.openml/cache" - ttl: int = 60 * 60 * 24 * 7 # one week - - -class Settings: - """Settings container that reads from openml.config on access.""" - - _instance: Settings | None = None - - def __init__(self) -> None: - self.api_configs: dict[str, APIConfig] = {} - self.connection = ConnectionConfig() - self.cache = CacheConfig() - self._initialized = False - - @classmethod - def get(cls) -> Settings: - """Get settings singleton, creating on first access.""" - if cls._instance is None: - cls._instance = cls() - return cls._instance - - @classmethod - def reset(cls) -> None: - """Reset the settings singleton. Useful for testing.""" - cls._instance = None - - def get_api_config(self, version: str) -> APIConfig: - """Get API config for a version, with lazy initialization from openml.config.""" - if not self._initialized: - self._init_from_legacy_config() - if version not in self.api_configs: - raise NotImplementedError( - f"API {version} is not yet available. " - f"Supported versions: {list(self.api_configs.keys())}" - ) - return self.api_configs[version] - - def _init_from_legacy_config(self) -> None: - """Lazy init from openml.config to avoid circular imports.""" - if self._initialized: - return - - # Import here (not at module level) to avoid circular imports. - # We read from openml.config to integrate with the existing config system - # where users set their API key, server, cache directory, etc. - # This avoids duplicating those settings with hardcoded values. - import openml.config as legacy - - server_url = legacy.server - server_base = server_url.rsplit("/api", 1)[0] + "/" if "/api" in server_url else server_url - - self.api_configs["v1"] = APIConfig( - server=server_base, - base_url="api/v1/xml/", - api_key=legacy.apikey, - ) - - # Sync connection- and cache- settings from legacy config - self.connection = ConnectionConfig( - retries=legacy.connection_n_retries, - retry_policy=RetryPolicy(legacy.retry_policy), - ) - self.cache = CacheConfig( - dir=str(legacy._root_cache_directory), - ) - - self._initialized = True diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index b666c018b..a3dc63798 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,3 +1,4 @@ +from openml._api.resources._registry import API_REGISTRY from openml._api.resources.base.fallback import FallbackProxy from openml._api.resources.dataset import DatasetV1API, DatasetV2API from openml._api.resources.estimation_procedure import ( @@ -13,6 +14,7 @@ from openml._api.resources.task import TaskV1API, TaskV2API __all__ = [ + "API_REGISTRY", "DatasetV1API", "DatasetV2API", "EstimationProcedureV1API", diff --git a/openml/_api/resources/_registry.py b/openml/_api/resources/_registry.py new file mode 100644 index 000000000..e8746f481 --- /dev/null +++ b/openml/_api/resources/_registry.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from openml._api.resources.base.enums import APIVersion, ResourceType +from openml._api.resources.dataset import DatasetV1API, DatasetV2API +from openml._api.resources.estimation_procedure import ( + EstimationProcedureV1API, + EstimationProcedureV2API, +) +from openml._api.resources.evaluation import EvaluationV1API, EvaluationV2API +from openml._api.resources.evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API +from openml._api.resources.flow import FlowV1API, FlowV2API +from openml._api.resources.run import RunV1API, RunV2API +from openml._api.resources.setup import SetupV1API, SetupV2API +from openml._api.resources.study import StudyV1API, StudyV2API +from openml._api.resources.task import TaskV1API, TaskV2API + +if TYPE_CHECKING: + from openml._api.resources.base import ResourceAPI + +API_REGISTRY: dict[ + APIVersion, + dict[ResourceType, type[ResourceAPI]], +] = { + APIVersion.V1: { + ResourceType.DATASET: DatasetV1API, + ResourceType.TASK: TaskV1API, + ResourceType.EVALUATION_MEASURE: EvaluationMeasureV1API, + ResourceType.ESTIMATION_PROCEDURE: EstimationProcedureV1API, + ResourceType.EVALUATION: EvaluationV1API, + ResourceType.FLOW: FlowV1API, + ResourceType.STUDY: StudyV1API, + ResourceType.RUN: RunV1API, + ResourceType.SETUP: SetupV1API, + }, + APIVersion.V2: { + ResourceType.DATASET: DatasetV2API, + ResourceType.TASK: TaskV2API, + ResourceType.EVALUATION_MEASURE: EvaluationMeasureV2API, + ResourceType.ESTIMATION_PROCEDURE: EstimationProcedureV2API, + ResourceType.EVALUATION: EvaluationV2API, + ResourceType.FLOW: FlowV2API, + ResourceType.STUDY: StudyV2API, + ResourceType.RUN: RunV2API, + ResourceType.SETUP: SetupV2API, + }, +} diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index dbe3e95ea..6a47f83f4 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -9,16 +9,17 @@ from collections.abc import Mapping from typing import Any - from openml._api.clients import HTTPClient - from openml._api.config import APIVersion, ResourceType + from openml._api.clients import HTTPClient, MinIOClient + from openml._api.resources.base.enums import APIVersion, ResourceType class ResourceAPI(ABC): api_version: APIVersion resource_type: ResourceType - def __init__(self, http: HTTPClient): + def __init__(self, http: HTTPClient, minio: MinIOClient | None = None): self._http = http + self._minio = minio @abstractmethod def delete(self, resource_id: int) -> bool: ... diff --git a/openml/_api/resources/base/enums.py b/openml/_api/resources/base/enums.py new file mode 100644 index 000000000..13201b3ec --- /dev/null +++ b/openml/_api/resources/base/enums.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from enum import Enum + + +class APIVersion(str, Enum): + V1 = "v1" + V2 = "v2" + + +class ResourceType(str, Enum): + DATASET = "dataset" + TASK = "task" + TASK_TYPE = "task_type" + EVALUATION_MEASURE = "evaluation_measure" + ESTIMATION_PROCEDURE = "estimation_procedure" + EVALUATION = "evaluation" + FLOW = "flow" + STUDY = "study" + RUN = "run" + SETUP = "setup" + USER = "user" + + +class RetryPolicy(str, Enum): + HUMAN = "human" + ROBOT = "robot" diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 200278fc2..270472029 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,21 +1,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING - -from openml._api.config import ResourceType from openml._api.resources.base import ResourceAPI - -if TYPE_CHECKING: - from openml._api.clients import HTTPClient, MinIOClient +from openml._api.resources.base.enums import ResourceType class DatasetAPI(ResourceAPI): resource_type: ResourceType = ResourceType.DATASET - def __init__(self, http: HTTPClient, minio: MinIOClient): - self._minio = minio - super().__init__(http) - class TaskAPI(ResourceAPI): resource_type: ResourceType = ResourceType.TASK diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 88ae87a1c..f8b21a469 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -5,8 +5,8 @@ import xmltodict -from openml._api.config import APIVersion, ResourceType from openml._api.resources.base import ResourceAPI +from openml._api.resources.base.enums import APIVersion, ResourceType from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py deleted file mode 100644 index 9c3ff70a5..000000000 --- a/openml/_api/runtime/core.py +++ /dev/null @@ -1,190 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import TYPE_CHECKING - -from openml._api.clients import HTTPCache, HTTPClient, MinIOClient -from openml._api.config import Settings -from openml._api.resources import ( - DatasetV1API, - DatasetV2API, - EstimationProcedureV1API, - EstimationProcedureV2API, - EvaluationMeasureV1API, - EvaluationMeasureV2API, - EvaluationV1API, - EvaluationV2API, - FallbackProxy, - FlowV1API, - FlowV2API, - RunV1API, - RunV2API, - SetupV1API, - SetupV2API, - StudyV1API, - StudyV2API, - TaskV1API, - TaskV2API, -) - -if TYPE_CHECKING: - from openml._api.resources.base import ( - DatasetAPI, - EstimationProcedureAPI, - EvaluationAPI, - EvaluationMeasureAPI, - FlowAPI, - RunAPI, - SetupAPI, - StudyAPI, - TaskAPI, - ) - - -class APIBackend: - def __init__( # noqa: PLR0913 - self, - *, - dataset: DatasetAPI | FallbackProxy, - task: TaskAPI | FallbackProxy, - evaluation_measure: EvaluationMeasureAPI | FallbackProxy, - estimation_procedure: EstimationProcedureAPI | FallbackProxy, - evaluation: EvaluationAPI | FallbackProxy, - flow: FlowAPI | FallbackProxy, - study: StudyAPI | FallbackProxy, - run: RunAPI | FallbackProxy, - setup: SetupAPI | FallbackProxy, - ): - self.dataset = dataset - self.task = task - self.evaluation_measure = evaluation_measure - self.estimation_procedure = estimation_procedure - self.evaluation = evaluation - self.flow = flow - self.study = study - self.run = run - self.setup = setup - - @classmethod - def build(cls, version: str, *, strict: bool) -> APIBackend: - settings = Settings.get() - - # Get config for v1. On first access, this triggers lazy initialization - # from openml.config, reading the user's actual API key, server URL, - # cache directory, and retry settings. This avoids circular imports - # (openml.config is imported inside the method, not at module load time) - # and ensures we use the user's configured values rather than hardcoded defaults. - v1_config = settings.get_api_config("v1") - - http_cache = HTTPCache( - path=Path(settings.cache.dir).expanduser(), - ttl=settings.cache.ttl, - ) - minio_client = MinIOClient( - path=Path(settings.cache.dir).expanduser(), - ) - - v1_http_client = HTTPClient( - server=v1_config.server, - base_url=v1_config.base_url, - api_key=v1_config.api_key, - timeout=v1_config.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - v1_dataset = DatasetV1API(v1_http_client, minio_client) - v1_task = TaskV1API(v1_http_client) - v1_evaluation_measure = EvaluationMeasureV1API(v1_http_client) - v1_estimation_procedure = EstimationProcedureV1API(v1_http_client) - v1_evaluation = EvaluationV1API(v1_http_client) - v1_flow = FlowV1API(v1_http_client) - v1_study = StudyV1API(v1_http_client) - v1_run = RunV1API(v1_http_client) - v1_setup = SetupV1API(v1_http_client) - - v1 = cls( - dataset=v1_dataset, - task=v1_task, - evaluation_measure=v1_evaluation_measure, - estimation_procedure=v1_estimation_procedure, - evaluation=v1_evaluation, - flow=v1_flow, - study=v1_study, - run=v1_run, - setup=v1_setup, - ) - - if version == "v1": - return v1 - - # V2 support. Currently v2 is not yet available, - # so get_api_config("v2") raises NotImplementedError. When v2 becomes available, - # its config will be added to Settings._init_from_legacy_config(). - # In strict mode: propagate the error. - # In non-strict mode: silently fall back to v1 only. - try: - v2_config = settings.get_api_config("v2") - except NotImplementedError: - if strict: - raise - # Non-strict mode: fall back to v1 only - return v1 - - v2_http_client = HTTPClient( - server=v2_config.server, - base_url=v2_config.base_url, - api_key=v2_config.api_key, - timeout=v2_config.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - v2_dataset = DatasetV2API(v2_http_client, minio_client) - v2_task = TaskV2API(v2_http_client) - v2_evaluation_measure = EvaluationMeasureV2API(v2_http_client) - v2_estimation_procedure = EstimationProcedureV2API(v2_http_client) - v2_evaluation = EvaluationV2API(v2_http_client) - v2_flow = FlowV2API(v2_http_client) - v2_study = StudyV2API(v2_http_client) - v2_run = RunV2API(v2_http_client) - v2_setup = SetupV2API(v2_http_client) - - v2 = cls( - dataset=v2_dataset, - task=v2_task, - evaluation_measure=v2_evaluation_measure, - estimation_procedure=v2_estimation_procedure, - evaluation=v2_evaluation, - flow=v2_flow, - study=v2_study, - run=v2_run, - setup=v2_setup, - ) - - if strict: - return v2 - - fallback_dataset = FallbackProxy(v1_dataset, v2_dataset) - fallback_task = FallbackProxy(v1_task, v2_task) - fallback_evaluation_measure = FallbackProxy(v1_evaluation_measure, v2_evaluation_measure) - fallback_estimation_procedure = FallbackProxy( - v1_estimation_procedure, v2_estimation_procedure - ) - fallback_evaluation = FallbackProxy(v1_evaluation, v2_evaluation) - fallback_flow = FallbackProxy(v1_flow, v2_flow) - fallback_study = FallbackProxy(v1_study, v2_study) - fallback_run = FallbackProxy(v1_run, v2_run) - fallback_setup = FallbackProxy(v1_setup, v2_setup) - - return cls( - dataset=fallback_dataset, - task=fallback_task, - evaluation_measure=fallback_evaluation_measure, - estimation_procedure=fallback_estimation_procedure, - evaluation=fallback_evaluation, - flow=fallback_flow, - study=fallback_study, - run=fallback_run, - setup=fallback_setup, - ) diff --git a/openml/_api/runtime/instance.py b/openml/_api/runtime/instance.py deleted file mode 100644 index 633d3f372..000000000 --- a/openml/_api/runtime/instance.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import annotations - -from openml._api.runtime.core import APIBackend - -_backend: APIBackend = APIBackend.build(version="v1", strict=False) diff --git a/openml/_api/runtime/__init__.py b/openml/_api/setup/__init__.py similarity index 100% rename from openml/_api/runtime/__init__.py rename to openml/_api/setup/__init__.py diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py new file mode 100644 index 000000000..4f4b843d7 --- /dev/null +++ b/openml/_api/setup/builder.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from collections.abc import Mapping +from pathlib import Path +from typing import TYPE_CHECKING + +from openml._api.clients import HTTPCache, HTTPClient, MinIOClient +from openml._api.resources import API_REGISTRY, FallbackProxy + +if TYPE_CHECKING: + from openml._api.resources.base import ResourceAPI + from openml._api.resources.base.enums import ResourceType + from openml._api.setup.config import Config + + +class APIBackendBuilder: + def __init__( + self, + resource_apis: Mapping[ResourceType, ResourceAPI | FallbackProxy], + ): + for resource_type, resource_api in resource_apis.items(): + setattr(self, resource_type.value, resource_api) + + @classmethod + def build(cls, config: Config) -> APIBackendBuilder: + cache_dir = Path(config.cache.dir).expanduser() + + http_cache = HTTPCache(path=cache_dir, ttl=config.cache.ttl) + minio_client = MinIOClient(path=cache_dir) + + primary_api_config = config.api_configs[config.api_version] + primary_http_client = HTTPClient( + server=primary_api_config.server, + base_url=primary_api_config.base_url, + api_key=primary_api_config.api_key, + timeout=config.connection.timeout, + retries=config.connection.retries, + retry_policy=config.connection.retry_policy, + cache=http_cache, + ) + + resource_apis: dict[ResourceType, ResourceAPI] = {} + for resource_type, resource_api_cls in API_REGISTRY[config.api_version].items(): + resource_apis[resource_type] = resource_api_cls(primary_http_client, minio_client) + + if config.fallback_api_version is None: + return cls(resource_apis) + + fallback_api_config = config.api_configs[config.fallback_api_version] + fallback_http_client = HTTPClient( + server=fallback_api_config.server, + base_url=fallback_api_config.base_url, + api_key=fallback_api_config.api_key, + timeout=config.connection.timeout, + retries=config.connection.retries, + retry_policy=config.connection.retry_policy, + cache=http_cache, + ) + + fallback_resource_apis: dict[ResourceType, ResourceAPI] = {} + for resource_type, resource_api_cls in API_REGISTRY[config.fallback_api_version].items(): + fallback_resource_apis[resource_type] = resource_api_cls( + fallback_http_client, minio_client + ) + + merged: dict[ResourceType, FallbackProxy] = { + name: FallbackProxy(resource_apis[name], fallback_resource_apis[name]) + for name in resource_apis + } + + return cls(merged) diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py new file mode 100644 index 000000000..0f783a23e --- /dev/null +++ b/openml/_api/setup/config.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from openml._api.resources.base.enums import APIVersion, RetryPolicy +from openml._api.setup.utils import _resolve_default_cache_dir + + +@dataclass +class APIConfig: + server: str + base_url: str + api_key: str + + +@dataclass +class ConnectionConfig: + retries: int + retry_policy: RetryPolicy + timeout: int + + +@dataclass +class CacheConfig: + dir: str + ttl: int + + +@dataclass +class Config: + api_version: APIVersion = APIVersion.V1 + fallback_api_version: APIVersion | None = None + + api_configs: dict[APIVersion, APIConfig] = field( + default_factory=lambda: { + APIVersion.V1: APIConfig( + server="https://www.openml.org/", + base_url="api/v1/xml/", + api_key="", + ), + APIVersion.V2: APIConfig( + server="http://localhost:8002/", + base_url="", + api_key="", + ), + } + ) + + connection: ConnectionConfig = field( + default_factory=lambda: ConnectionConfig( + retries=5, + retry_policy=RetryPolicy.HUMAN, + timeout=10, + ) + ) + + cache: CacheConfig = field( + default_factory=lambda: CacheConfig( + dir=str(_resolve_default_cache_dir()), + ttl=60 * 60 * 24 * 7, + ) + ) diff --git a/openml/_api/setup/utils.py b/openml/_api/setup/utils.py new file mode 100644 index 000000000..ddcf5b41c --- /dev/null +++ b/openml/_api/setup/utils.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import logging +import os +import platform +from pathlib import Path + +openml_logger = logging.getLogger("openml") + +# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) +_user_path = Path("~").expanduser().absolute() + + +def _resolve_default_cache_dir() -> Path: + user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") + if user_defined_cache_dir is not None: + return Path(user_defined_cache_dir) + + if platform.system().lower() != "linux": + return _user_path / ".openml" + + xdg_cache_home = os.environ.get("XDG_CACHE_HOME") + if xdg_cache_home is None: + return Path("~", ".cache", "openml") + + # This is the proper XDG_CACHE_HOME directory, but + # we unfortunately had a problem where we used XDG_CACHE_HOME/org, + # we check heuristically if this old directory still exists and issue + # a warning if it does. There's too much data to move to do this for the user. + + # The new cache directory exists + cache_dir = Path(xdg_cache_home) / "openml" + if cache_dir.exists(): + return cache_dir + + # The old cache directory *does not* exist + heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" + if not heuristic_dir_for_backwards_compat.exists(): + return cache_dir + + root_dir_to_delete = Path(xdg_cache_home) / "org" + openml_logger.warning( + "An old cache directory was found at '%s'. This directory is no longer used by " + "OpenML-Python. To silence this warning you would need to delete the old cache " + "directory. The cached files will then be located in '%s'.", + root_dir_to_delete, + cache_dir, + ) + return Path(xdg_cache_home) diff --git a/openml/testing.py b/openml/testing.py index b0aaac9be..18e03fb86 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -17,7 +17,7 @@ import openml from openml._api.clients import HTTPCache, HTTPClient -from openml._api.config import RetryPolicy +from openml._api.resources.base.enums import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 2203ab6da..fd41feb2a 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,7 +1,7 @@ import pytest from openml.testing import TestAPIBase from openml._api.resources.base.versions import ResourceV1API -from openml._api.config import ResourceType +from openml._api.resources.base.enums import ResourceType class TestResourceV1API(TestAPIBase): From dc26e016e02b4ed23961f148234398582b152e6f Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 10:40:03 +0500 Subject: [PATCH 059/156] implement APIBackend as controller --- openml/__init__.py | 2 ++ openml/_api/setup/_instance.py | 5 +++ openml/_api/setup/backend.py | 62 ++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 openml/_api/setup/_instance.py create mode 100644 openml/_api/setup/backend.py diff --git a/openml/__init__.py b/openml/__init__.py index ae5db261f..fdf3b90e4 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -33,6 +33,7 @@ utils, ) from .__version__ import __version__ +from ._api.setup._instance import _backend from .datasets import OpenMLDataFeature, OpenMLDataset from .evaluations import OpenMLEvaluation from .flows import OpenMLFlow @@ -109,6 +110,7 @@ def populate_cache( "OpenMLTask", "__version__", "_api_calls", + "_backend", "config", "datasets", "evaluations", diff --git a/openml/_api/setup/_instance.py b/openml/_api/setup/_instance.py new file mode 100644 index 000000000..2d9818a0d --- /dev/null +++ b/openml/_api/setup/_instance.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from openml._api.setup.backend import APIBackend + +_backend = APIBackend.get_instance() diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py new file mode 100644 index 000000000..7c300e143 --- /dev/null +++ b/openml/_api/setup/backend.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from copy import deepcopy +from typing import Any + +from openml._api.setup.builder import APIBackendBuilder +from openml._api.setup.config import Config + + +class APIBackend: + _instance: APIBackend | None = None + + def __init__(self, config: Config | None = None): + self._config: Config = config or Config() + self._backend = APIBackendBuilder.build(self._config) + + def __getattr__(self, name: str) -> Any: + """ + Delegate attribute access to the underlying backend. + Called only if attribute is not found on RuntimeBackend. + """ + return getattr(self._backend, name) + + @classmethod + def get_instance(cls) -> APIBackend: + if cls._instance is None: + cls._instance = cls() + return cls._instance + + @classmethod + def get_config(cls) -> Config: + return deepcopy(cls.get_instance()._config) + + @classmethod + def set_config(cls, config: Config) -> None: + instance = cls.get_instance() + instance._config = config + instance._backend = APIBackendBuilder.build(config) + + @classmethod + def get_config_value(cls, key: str) -> Config: + keys = key.split(".") + config_value = cls.get_instance()._config + for k in keys: + if isinstance(config_value, dict): + config_value = config_value[k] + else: + config_value = getattr(config_value, k) + return deepcopy(config_value) + + @classmethod + def set_config_value(cls, key: str, value: Any) -> None: + keys = key.split(".") + config = cls.get_instance()._config + parent = config + for k in keys[:-1]: + parent = parent[k] if isinstance(parent, dict) else getattr(parent, k) + if isinstance(parent, dict): + parent[keys[-1]] = value + else: + setattr(parent, keys[-1], value) + cls.set_config(config) From e2d059b110da6d6b1355773b5b1b35689e977dca Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 12:05:33 +0500 Subject: [PATCH 060/156] move enums --- openml/_api/clients/http.py | 2 +- openml/_api/resources/_registry.py | 2 +- openml/_api/resources/base/base.py | 2 +- openml/_api/resources/base/resources.py | 2 +- openml/_api/resources/base/versions.py | 2 +- openml/_api/setup/builder.py | 2 +- openml/_api/setup/config.py | 2 +- openml/{_api/resources/base => }/enums.py | 6 ++++++ openml/testing.py | 2 +- tests/test_api/test_versions.py | 2 +- 10 files changed, 15 insertions(+), 9 deletions(-) rename openml/{_api/resources/base => }/enums.py (76%) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index f700c108a..353cd5e9e 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -16,7 +16,7 @@ from requests import Response from openml.__version__ import __version__ -from openml._api.resources.base.enums import RetryPolicy +from openml.enums import RetryPolicy from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/openml/_api/resources/_registry.py b/openml/_api/resources/_registry.py index e8746f481..b1a5f2b74 100644 --- a/openml/_api/resources/_registry.py +++ b/openml/_api/resources/_registry.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING -from openml._api.resources.base.enums import APIVersion, ResourceType from openml._api.resources.dataset import DatasetV1API, DatasetV2API from openml._api.resources.estimation_procedure import ( EstimationProcedureV1API, @@ -15,6 +14,7 @@ from openml._api.resources.setup import SetupV1API, SetupV2API from openml._api.resources.study import StudyV1API, StudyV2API from openml._api.resources.task import TaskV1API, TaskV2API +from openml.enums import APIVersion, ResourceType if TYPE_CHECKING: from openml._api.resources.base import ResourceAPI diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 6a47f83f4..5eadc4932 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -10,7 +10,7 @@ from typing import Any from openml._api.clients import HTTPClient, MinIOClient - from openml._api.resources.base.enums import APIVersion, ResourceType + from openml.enums import APIVersion, ResourceType class ResourceAPI(ABC): diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 270472029..5c4dde9de 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,7 +1,7 @@ from __future__ import annotations from openml._api.resources.base import ResourceAPI -from openml._api.resources.base.enums import ResourceType +from openml.enums import ResourceType class DatasetAPI(ResourceAPI): diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index f8b21a469..a98a0ad43 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -6,7 +6,7 @@ import xmltodict from openml._api.resources.base import ResourceAPI -from openml._api.resources.base.enums import APIVersion, ResourceType +from openml.enums import APIVersion, ResourceType from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 4f4b843d7..135b18da3 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -9,8 +9,8 @@ if TYPE_CHECKING: from openml._api.resources.base import ResourceAPI - from openml._api.resources.base.enums import ResourceType from openml._api.setup.config import Config + from openml.enums import ResourceType class APIBackendBuilder: diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 0f783a23e..64e790404 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -2,8 +2,8 @@ from dataclasses import dataclass, field -from openml._api.resources.base.enums import APIVersion, RetryPolicy from openml._api.setup.utils import _resolve_default_cache_dir +from openml.enums import APIVersion, RetryPolicy @dataclass diff --git a/openml/_api/resources/base/enums.py b/openml/enums.py similarity index 76% rename from openml/_api/resources/base/enums.py rename to openml/enums.py index 13201b3ec..f5a4381b7 100644 --- a/openml/_api/resources/base/enums.py +++ b/openml/enums.py @@ -4,11 +4,15 @@ class APIVersion(str, Enum): + """Supported OpenML API versions.""" + V1 = "v1" V2 = "v2" class ResourceType(str, Enum): + """Canonical resource types exposed by the OpenML API.""" + DATASET = "dataset" TASK = "task" TASK_TYPE = "task_type" @@ -23,5 +27,7 @@ class ResourceType(str, Enum): class RetryPolicy(str, Enum): + """Retry behavior for failed API requests.""" + HUMAN = "human" ROBOT = "robot" diff --git a/openml/testing.py b/openml/testing.py index 18e03fb86..3ca2d1b76 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -17,7 +17,7 @@ import openml from openml._api.clients import HTTPCache, HTTPClient -from openml._api.resources.base.enums import RetryPolicy +from openml.enums import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index fd41feb2a..a7451f3ae 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,7 +1,7 @@ import pytest from openml.testing import TestAPIBase from openml._api.resources.base.versions import ResourceV1API -from openml._api.resources.base.enums import ResourceType +from openml.enums import ResourceType class TestResourceV1API(TestAPIBase): From d156ad4e6f1c1d2488242419baf20f5e5fa0e219 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 12:21:17 +0500 Subject: [PATCH 061/156] module level imports --- openml/_api/__init__.py | 69 +++++++++++++++++++ openml/_api/resources/__init__.py | 23 ++++--- openml/_api/resources/_registry.py | 23 ++++--- openml/_api/resources/base/__init__.py | 8 +-- openml/_api/resources/base/resources.py | 3 +- openml/_api/resources/base/versions.py | 3 +- openml/_api/resources/dataset.py | 2 +- openml/_api/resources/estimation_procedure.py | 2 +- openml/_api/resources/evaluation.py | 2 +- openml/_api/resources/evaluation_measure.py | 2 +- openml/_api/resources/flow.py | 2 +- openml/_api/resources/run.py | 2 +- openml/_api/resources/setup.py | 2 +- openml/_api/resources/study.py | 2 +- openml/_api/resources/task.py | 2 +- openml/_api/setup/__init__.py | 12 ++++ openml/_api/setup/_instance.py | 2 +- openml/_api/setup/backend.py | 4 +- openml/_api/setup/builder.py | 6 +- openml/_api/setup/config.py | 3 +- openml/_api/setup/utils.py | 49 ------------- 21 files changed, 130 insertions(+), 93 deletions(-) delete mode 100644 openml/_api/setup/utils.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index e69de29bb..25bc2f262 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -0,0 +1,69 @@ +from .clients import ( + HTTPCache, + HTTPClient, + MinIOClient, +) +from .resources import ( + API_REGISTRY, + DatasetV1API, + DatasetV2API, + EstimationProcedureV1API, + EstimationProcedureV2API, + EvaluationMeasureV1API, + EvaluationMeasureV2API, + EvaluationV1API, + EvaluationV2API, + FallbackProxy, + FlowV1API, + FlowV2API, + ResourceAPI, + RunV1API, + RunV2API, + SetupV1API, + SetupV2API, + StudyV1API, + StudyV2API, + TaskV1API, + TaskV2API, +) +from .setup import ( + APIBackend, + APIBackendBuilder, + APIConfig, + CacheConfig, + Config, + ConnectionConfig, +) + +__all__ = [ + "API_REGISTRY", + "APIBackend", + "APIBackendBuilder", + "APIConfig", + "CacheConfig", + "Config", + "ConnectionConfig", + "DatasetV1API", + "DatasetV2API", + "EstimationProcedureV1API", + "EstimationProcedureV2API", + "EvaluationMeasureV1API", + "EvaluationMeasureV2API", + "EvaluationV1API", + "EvaluationV2API", + "FallbackProxy", + "FlowV1API", + "FlowV2API", + "HTTPCache", + "HTTPClient", + "MinIOClient", + "ResourceAPI", + "RunV1API", + "RunV2API", + "SetupV1API", + "SetupV2API", + "StudyV1API", + "StudyV2API", + "TaskV1API", + "TaskV2API", +] diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index a3dc63798..863ec0f72 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,17 +1,17 @@ -from openml._api.resources._registry import API_REGISTRY -from openml._api.resources.base.fallback import FallbackProxy -from openml._api.resources.dataset import DatasetV1API, DatasetV2API -from openml._api.resources.estimation_procedure import ( +from ._registry import API_REGISTRY +from .base import FallbackProxy, ResourceAPI +from .dataset import DatasetV1API, DatasetV2API +from .estimation_procedure import ( EstimationProcedureV1API, EstimationProcedureV2API, ) -from openml._api.resources.evaluation import EvaluationV1API, EvaluationV2API -from openml._api.resources.evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API -from openml._api.resources.flow import FlowV1API, FlowV2API -from openml._api.resources.run import RunV1API, RunV2API -from openml._api.resources.setup import SetupV1API, SetupV2API -from openml._api.resources.study import StudyV1API, StudyV2API -from openml._api.resources.task import TaskV1API, TaskV2API +from .evaluation import EvaluationV1API, EvaluationV2API +from .evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API +from .flow import FlowV1API, FlowV2API +from .run import RunV1API, RunV2API +from .setup import SetupV1API, SetupV2API +from .study import StudyV1API, StudyV2API +from .task import TaskV1API, TaskV2API __all__ = [ "API_REGISTRY", @@ -26,6 +26,7 @@ "FallbackProxy", "FlowV1API", "FlowV2API", + "ResourceAPI", "RunV1API", "RunV2API", "SetupV1API", diff --git a/openml/_api/resources/_registry.py b/openml/_api/resources/_registry.py index b1a5f2b74..66d7ec428 100644 --- a/openml/_api/resources/_registry.py +++ b/openml/_api/resources/_registry.py @@ -2,22 +2,23 @@ from typing import TYPE_CHECKING -from openml._api.resources.dataset import DatasetV1API, DatasetV2API -from openml._api.resources.estimation_procedure import ( +from openml.enums import APIVersion, ResourceType + +from .dataset import DatasetV1API, DatasetV2API +from .estimation_procedure import ( EstimationProcedureV1API, EstimationProcedureV2API, ) -from openml._api.resources.evaluation import EvaluationV1API, EvaluationV2API -from openml._api.resources.evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API -from openml._api.resources.flow import FlowV1API, FlowV2API -from openml._api.resources.run import RunV1API, RunV2API -from openml._api.resources.setup import SetupV1API, SetupV2API -from openml._api.resources.study import StudyV1API, StudyV2API -from openml._api.resources.task import TaskV1API, TaskV2API -from openml.enums import APIVersion, ResourceType +from .evaluation import EvaluationV1API, EvaluationV2API +from .evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API +from .flow import FlowV1API, FlowV2API +from .run import RunV1API, RunV2API +from .setup import SetupV1API, SetupV2API +from .study import StudyV1API, StudyV2API +from .task import TaskV1API, TaskV2API if TYPE_CHECKING: - from openml._api.resources.base import ResourceAPI + from .base import ResourceAPI API_REGISTRY: dict[ APIVersion, diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py index f222a0b87..ed6dc26f7 100644 --- a/openml/_api/resources/base/__init__.py +++ b/openml/_api/resources/base/__init__.py @@ -1,6 +1,6 @@ -from openml._api.resources.base.base import ResourceAPI -from openml._api.resources.base.fallback import FallbackProxy -from openml._api.resources.base.resources import ( +from .base import ResourceAPI +from .fallback import FallbackProxy +from .resources import ( DatasetAPI, EstimationProcedureAPI, EvaluationAPI, @@ -11,7 +11,7 @@ StudyAPI, TaskAPI, ) -from openml._api.resources.base.versions import ResourceV1API, ResourceV2API +from .versions import ResourceV1API, ResourceV2API __all__ = [ "DatasetAPI", diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 5c4dde9de..8ccd5776e 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,8 +1,9 @@ from __future__ import annotations -from openml._api.resources.base import ResourceAPI from openml.enums import ResourceType +from .base import ResourceAPI + class DatasetAPI(ResourceAPI): resource_type: ResourceType = ResourceType.DATASET diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index a98a0ad43..b86272377 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -5,7 +5,6 @@ import xmltodict -from openml._api.resources.base import ResourceAPI from openml.enums import APIVersion, ResourceType from openml.exceptions import ( OpenMLNotAuthorizedError, @@ -13,6 +12,8 @@ OpenMLServerException, ) +from .base import ResourceAPI + class ResourceV1API(ResourceAPI): api_version: APIVersion = APIVersion.V1 diff --git a/openml/_api/resources/dataset.py b/openml/_api/resources/dataset.py index 3ecad35da..51688a2fd 100644 --- a/openml/_api/resources/dataset.py +++ b/openml/_api/resources/dataset.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import DatasetAPI, ResourceV1API, ResourceV2API +from .base import DatasetAPI, ResourceV1API, ResourceV2API class DatasetV1API(ResourceV1API, DatasetAPI): diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py index d2e73cfa6..b8ea7d2c3 100644 --- a/openml/_api/resources/estimation_procedure.py +++ b/openml/_api/resources/estimation_procedure.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import EstimationProcedureAPI, ResourceV1API, ResourceV2API +from .base import EstimationProcedureAPI, ResourceV1API, ResourceV2API class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI): diff --git a/openml/_api/resources/evaluation.py b/openml/_api/resources/evaluation.py index a0149e1e5..07877e14e 100644 --- a/openml/_api/resources/evaluation.py +++ b/openml/_api/resources/evaluation.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import EvaluationAPI, ResourceV1API, ResourceV2API +from .base import EvaluationAPI, ResourceV1API, ResourceV2API class EvaluationV1API(ResourceV1API, EvaluationAPI): diff --git a/openml/_api/resources/evaluation_measure.py b/openml/_api/resources/evaluation_measure.py index bd4318417..63cf16c77 100644 --- a/openml/_api/resources/evaluation_measure.py +++ b/openml/_api/resources/evaluation_measure.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import EvaluationMeasureAPI, ResourceV1API, ResourceV2API +from .base import EvaluationMeasureAPI, ResourceV1API, ResourceV2API class EvaluationMeasureV1API(ResourceV1API, EvaluationMeasureAPI): diff --git a/openml/_api/resources/flow.py b/openml/_api/resources/flow.py index 3b62abd3f..ad2e05bd9 100644 --- a/openml/_api/resources/flow.py +++ b/openml/_api/resources/flow.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import FlowAPI, ResourceV1API, ResourceV2API +from .base import FlowAPI, ResourceV1API, ResourceV2API class FlowV1API(ResourceV1API, FlowAPI): diff --git a/openml/_api/resources/run.py b/openml/_api/resources/run.py index 9698c59dd..151c69e35 100644 --- a/openml/_api/resources/run.py +++ b/openml/_api/resources/run.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import ResourceV1API, ResourceV2API, RunAPI +from .base import ResourceV1API, ResourceV2API, RunAPI class RunV1API(ResourceV1API, RunAPI): diff --git a/openml/_api/resources/setup.py b/openml/_api/resources/setup.py index e948e1b38..78a36cecc 100644 --- a/openml/_api/resources/setup.py +++ b/openml/_api/resources/setup.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import ResourceV1API, ResourceV2API, SetupAPI +from .base import ResourceV1API, ResourceV2API, SetupAPI class SetupV1API(ResourceV1API, SetupAPI): diff --git a/openml/_api/resources/study.py b/openml/_api/resources/study.py index 8de5868d1..cefd55004 100644 --- a/openml/_api/resources/study.py +++ b/openml/_api/resources/study.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import ResourceV1API, ResourceV2API, StudyAPI +from .base import ResourceV1API, ResourceV2API, StudyAPI class StudyV1API(ResourceV1API, StudyAPI): diff --git a/openml/_api/resources/task.py b/openml/_api/resources/task.py index a97d5f726..a367c9aa1 100644 --- a/openml/_api/resources/task.py +++ b/openml/_api/resources/task.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import ResourceV1API, ResourceV2API, TaskAPI +from .base import ResourceV1API, ResourceV2API, TaskAPI class TaskV1API(ResourceV1API, TaskAPI): diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py index e69de29bb..7f8c65ba3 100644 --- a/openml/_api/setup/__init__.py +++ b/openml/_api/setup/__init__.py @@ -0,0 +1,12 @@ +from .backend import APIBackend +from .builder import APIBackendBuilder +from .config import APIConfig, CacheConfig, Config, ConnectionConfig + +__all__ = [ + "APIBackend", + "APIBackendBuilder", + "APIConfig", + "CacheConfig", + "Config", + "ConnectionConfig", +] diff --git a/openml/_api/setup/_instance.py b/openml/_api/setup/_instance.py index 2d9818a0d..c98ccaf57 100644 --- a/openml/_api/setup/_instance.py +++ b/openml/_api/setup/_instance.py @@ -1,5 +1,5 @@ from __future__ import annotations -from openml._api.setup.backend import APIBackend +from .backend import APIBackend _backend = APIBackend.get_instance() diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index 7c300e143..f0faf5165 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -3,8 +3,8 @@ from copy import deepcopy from typing import Any -from openml._api.setup.builder import APIBackendBuilder -from openml._api.setup.config import Config +from .builder import APIBackendBuilder +from .config import Config class APIBackend: diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 135b18da3..750db431a 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -5,13 +5,13 @@ from typing import TYPE_CHECKING from openml._api.clients import HTTPCache, HTTPClient, MinIOClient -from openml._api.resources import API_REGISTRY, FallbackProxy +from openml._api.resources import API_REGISTRY, FallbackProxy, ResourceAPI if TYPE_CHECKING: - from openml._api.resources.base import ResourceAPI - from openml._api.setup.config import Config from openml.enums import ResourceType + from .config import Config + class APIBackendBuilder: def __init__( diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 64e790404..ea868262a 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -2,9 +2,10 @@ from dataclasses import dataclass, field -from openml._api.setup.utils import _resolve_default_cache_dir from openml.enums import APIVersion, RetryPolicy +from ._utils import _resolve_default_cache_dir + @dataclass class APIConfig: diff --git a/openml/_api/setup/utils.py b/openml/_api/setup/utils.py deleted file mode 100644 index ddcf5b41c..000000000 --- a/openml/_api/setup/utils.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import annotations - -import logging -import os -import platform -from pathlib import Path - -openml_logger = logging.getLogger("openml") - -# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) -_user_path = Path("~").expanduser().absolute() - - -def _resolve_default_cache_dir() -> Path: - user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") - if user_defined_cache_dir is not None: - return Path(user_defined_cache_dir) - - if platform.system().lower() != "linux": - return _user_path / ".openml" - - xdg_cache_home = os.environ.get("XDG_CACHE_HOME") - if xdg_cache_home is None: - return Path("~", ".cache", "openml") - - # This is the proper XDG_CACHE_HOME directory, but - # we unfortunately had a problem where we used XDG_CACHE_HOME/org, - # we check heuristically if this old directory still exists and issue - # a warning if it does. There's too much data to move to do this for the user. - - # The new cache directory exists - cache_dir = Path(xdg_cache_home) / "openml" - if cache_dir.exists(): - return cache_dir - - # The old cache directory *does not* exist - heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" - if not heuristic_dir_for_backwards_compat.exists(): - return cache_dir - - root_dir_to_delete = Path(xdg_cache_home) / "org" - openml_logger.warning( - "An old cache directory was found at '%s'. This directory is no longer used by " - "OpenML-Python. To silence this warning you would need to delete the old cache " - "directory. The cached files will then be located in '%s'.", - root_dir_to_delete, - cache_dir, - ) - return Path(xdg_cache_home) From d7a37884cc18fee1509cd43fcec696dd0efbf466 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 12:24:43 +0500 Subject: [PATCH 062/156] module level import for _backend --- openml/__init__.py | 2 +- openml/_api/__init__.py | 2 ++ openml/_api/setup/__init__.py | 2 ++ openml/_api/setup/_utils.py | 49 +++++++++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 openml/_api/setup/_utils.py diff --git a/openml/__init__.py b/openml/__init__.py index fdf3b90e4..21dda24ad 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -33,7 +33,7 @@ utils, ) from .__version__ import __version__ -from ._api.setup._instance import _backend +from ._api import _backend from .datasets import OpenMLDataFeature, OpenMLDataset from .evaluations import OpenMLEvaluation from .flows import OpenMLFlow diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 25bc2f262..2d4651431 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -33,6 +33,7 @@ CacheConfig, Config, ConnectionConfig, + _backend, ) __all__ = [ @@ -66,4 +67,5 @@ "StudyV2API", "TaskV1API", "TaskV2API", + "_backend", ] diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py index 7f8c65ba3..1c28cfa9e 100644 --- a/openml/_api/setup/__init__.py +++ b/openml/_api/setup/__init__.py @@ -1,3 +1,4 @@ +from ._instance import _backend from .backend import APIBackend from .builder import APIBackendBuilder from .config import APIConfig, CacheConfig, Config, ConnectionConfig @@ -9,4 +10,5 @@ "CacheConfig", "Config", "ConnectionConfig", + "_backend", ] diff --git a/openml/_api/setup/_utils.py b/openml/_api/setup/_utils.py new file mode 100644 index 000000000..ddcf5b41c --- /dev/null +++ b/openml/_api/setup/_utils.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import logging +import os +import platform +from pathlib import Path + +openml_logger = logging.getLogger("openml") + +# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) +_user_path = Path("~").expanduser().absolute() + + +def _resolve_default_cache_dir() -> Path: + user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") + if user_defined_cache_dir is not None: + return Path(user_defined_cache_dir) + + if platform.system().lower() != "linux": + return _user_path / ".openml" + + xdg_cache_home = os.environ.get("XDG_CACHE_HOME") + if xdg_cache_home is None: + return Path("~", ".cache", "openml") + + # This is the proper XDG_CACHE_HOME directory, but + # we unfortunately had a problem where we used XDG_CACHE_HOME/org, + # we check heuristically if this old directory still exists and issue + # a warning if it does. There's too much data to move to do this for the user. + + # The new cache directory exists + cache_dir = Path(xdg_cache_home) / "openml" + if cache_dir.exists(): + return cache_dir + + # The old cache directory *does not* exist + heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" + if not heuristic_dir_for_backwards_compat.exists(): + return cache_dir + + root_dir_to_delete = Path(xdg_cache_home) / "org" + openml_logger.warning( + "An old cache directory was found at '%s'. This directory is no longer used by " + "OpenML-Python. To silence this warning you would need to delete the old cache " + "directory. The cached files will then be located in '%s'.", + root_dir_to_delete, + cache_dir, + ) + return Path(xdg_cache_home) From b5b9ef60047cff083e30ab7eb6cb66f02baa1ff6 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 12:29:12 +0500 Subject: [PATCH 063/156] module level import for tests --- openml/_api/__init__.py | 24 ++++++++++++++++++++++++ openml/_api/resources/__init__.py | 29 ++++++++++++++++++++++++++++- openml/testing.py | 2 +- tests/test_api/test_versions.py | 2 +- 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 2d4651431..926fee3d4 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -5,24 +5,35 @@ ) from .resources import ( API_REGISTRY, + DatasetAPI, DatasetV1API, DatasetV2API, + EstimationProcedureAPI, EstimationProcedureV1API, EstimationProcedureV2API, + EvaluationAPI, + EvaluationMeasureAPI, EvaluationMeasureV1API, EvaluationMeasureV2API, EvaluationV1API, EvaluationV2API, FallbackProxy, + FlowAPI, FlowV1API, FlowV2API, ResourceAPI, + ResourceV1API, + ResourceV2API, + RunAPI, RunV1API, RunV2API, + SetupAPI, SetupV1API, SetupV2API, + StudyAPI, StudyV1API, StudyV2API, + TaskAPI, TaskV1API, TaskV2API, ) @@ -44,27 +55,40 @@ "CacheConfig", "Config", "ConnectionConfig", + "DatasetAPI", "DatasetV1API", "DatasetV2API", + "EstimationProcedureAPI", "EstimationProcedureV1API", "EstimationProcedureV2API", + "EvaluationAPI", + "EvaluationMeasureAPI", "EvaluationMeasureV1API", "EvaluationMeasureV2API", "EvaluationV1API", "EvaluationV2API", "FallbackProxy", + "FallbackProxy", + "FlowAPI", "FlowV1API", "FlowV2API", "HTTPCache", "HTTPClient", "MinIOClient", "ResourceAPI", + "ResourceAPI", + "ResourceV1API", + "ResourceV2API", + "RunAPI", "RunV1API", "RunV2API", + "SetupAPI", "SetupV1API", "SetupV2API", + "StudyAPI", "StudyV1API", "StudyV2API", + "TaskAPI", "TaskV1API", "TaskV2API", "_backend", diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index 863ec0f72..1f0b2caa1 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,5 +1,19 @@ from ._registry import API_REGISTRY -from .base import FallbackProxy, ResourceAPI +from .base import ( + DatasetAPI, + EstimationProcedureAPI, + EvaluationAPI, + EvaluationMeasureAPI, + FallbackProxy, + FlowAPI, + ResourceAPI, + ResourceV1API, + ResourceV2API, + RunAPI, + SetupAPI, + StudyAPI, + TaskAPI, +) from .dataset import DatasetV1API, DatasetV2API from .estimation_procedure import ( EstimationProcedureV1API, @@ -15,24 +29,37 @@ __all__ = [ "API_REGISTRY", + "DatasetAPI", "DatasetV1API", "DatasetV2API", + "EstimationProcedureAPI", "EstimationProcedureV1API", "EstimationProcedureV2API", + "EvaluationAPI", + "EvaluationMeasureAPI", "EvaluationMeasureV1API", "EvaluationMeasureV2API", "EvaluationV1API", "EvaluationV2API", "FallbackProxy", + "FallbackProxy", + "FlowAPI", "FlowV1API", "FlowV2API", "ResourceAPI", + "ResourceAPI", + "ResourceV1API", + "ResourceV2API", + "RunAPI", "RunV1API", "RunV2API", + "SetupAPI", "SetupV1API", "SetupV2API", + "StudyAPI", "StudyV1API", "StudyV2API", + "TaskAPI", "TaskV1API", "TaskV2API", ] diff --git a/openml/testing.py b/openml/testing.py index 3ca2d1b76..a971aa1c3 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -16,7 +16,7 @@ import requests import openml -from openml._api.clients import HTTPCache, HTTPClient +from openml._api import HTTPCache, HTTPClient from openml.enums import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index a7451f3ae..2507a3cd5 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,6 +1,6 @@ import pytest from openml.testing import TestAPIBase -from openml._api.resources.base.versions import ResourceV1API +from openml._api import ResourceV1API from openml.enums import ResourceType From 567eca4096d1332d1db07f8646a3733c241885f3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 13:00:38 +0500 Subject: [PATCH 064/156] add test: test_tag_and_untag --- tests/test_api/test_versions.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 2507a3cd5..6a4cad97d 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,3 +1,4 @@ +from time import time import pytest from openml.testing import TestAPIBase from openml._api import ResourceV1API @@ -41,4 +42,12 @@ def test_publish_and_delete(self): @pytest.mark.uses_test_server() def test_tag_and_untag(self): - pass + resource_id = 1 + unique_indicator = str(time()).replace(".", "") + tag = f"TestResourceV1API_test_tag_and_untag_{unique_indicator}" + + tags = self.resource.tag(resource_id, tag) + self.assertIn(tag, tags) + + tags = self.resource.untag(resource_id, tag) + self.assertNotIn(tag, tags) From b2287c32f5637a755f6b2e95c5472308969ef252 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 10:06:20 +0500 Subject: [PATCH 065/156] implement get/set_config_values --- openml/_api/setup/backend.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index f0faf5165..d8cf83f03 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -38,7 +38,7 @@ def set_config(cls, config: Config) -> None: instance._backend = APIBackendBuilder.build(config) @classmethod - def get_config_value(cls, key: str) -> Config: + def get_config_value(cls, key: str) -> Any: keys = key.split(".") config_value = cls.get_instance()._config for k in keys: @@ -60,3 +60,16 @@ def set_config_value(cls, key: str, value: Any) -> None: else: setattr(parent, keys[-1], value) cls.set_config(config) + + @classmethod + def get_config_values(cls, keys: list[str]) -> list[Any]: + values = [] + for key in keys: + value = cls.get_config_value(key) + values.append(value) + return values + + @classmethod + def set_config_values(cls, config_dict: dict[str, Any]) -> None: + for key, value in config_dict.items(): + cls.set_config_value(key, value) From b7e285eaafadabe88b7d4e0f42edc1f72459a2ee Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:22:36 +0500 Subject: [PATCH 066/156] improve APIBackend.set_config_values --- openml/_api/setup/backend.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index d8cf83f03..4dd0f4390 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -71,5 +71,16 @@ def get_config_values(cls, keys: list[str]) -> list[Any]: @classmethod def set_config_values(cls, config_dict: dict[str, Any]) -> None: + config = cls.get_instance()._config + for key, value in config_dict.items(): - cls.set_config_value(key, value) + keys = key.split(".") + parent = config + for k in keys[:-1]: + parent = parent[k] if isinstance(parent, dict) else getattr(parent, k) + if isinstance(parent, dict): + parent[keys[-1]] = value + else: + setattr(parent, keys[-1], value) + + cls.set_config(config) From fd43c489523c1a95e84bc2a95bf2caedd44262c2 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:24:24 +0500 Subject: [PATCH 067/156] use LegacyConfig --- openml/__init__.py | 7 +++- openml/_api_calls.py | 19 +++++----- openml/{config.py => _config.py} | 36 +++++++++++++++++++ openml/_legacy_config.py | 19 ++++++++++ openml/base.py | 2 +- openml/cli.py | 14 ++++---- openml/datasets/dataset.py | 6 ++-- openml/datasets/functions.py | 6 ++-- openml/evaluations/evaluation.py | 1 - openml/runs/functions.py | 18 +++++----- openml/setups/functions.py | 5 ++- openml/setups/setup.py | 1 - openml/study/functions.py | 2 +- openml/study/study.py | 4 +-- openml/tasks/task.py | 2 +- openml/utils.py | 6 ++-- .../test_evaluations_example.py | 5 ++- tests/test_openml/test_api_calls.py | 1 - tests/test_openml/test_config.py | 2 +- 19 files changed, 106 insertions(+), 50 deletions(-) rename openml/{config.py => _config.py} (95%) create mode 100644 openml/_legacy_config.py diff --git a/openml/__init__.py b/openml/__init__.py index 21dda24ad..30f38f5f0 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -20,7 +20,8 @@ from . import ( _api_calls, - config, + _config, + _legacy_config, datasets, evaluations, exceptions, @@ -50,6 +51,8 @@ OpenMLTask, ) +config = _legacy_config.LegacyConfig + def populate_cache( task_ids: list[int] | None = None, @@ -111,6 +114,8 @@ def populate_cache( "__version__", "_api_calls", "_backend", + "_config", + "_legacy_config", "config", "datasets", "evaluations", diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 9e53bd9fa..21d5c4391 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -19,7 +19,8 @@ import xmltodict from urllib3 import ProxyManager -from . import config +import openml + from .__version__ import __version__ from .exceptions import ( OpenMLHashException, @@ -70,7 +71,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url = config.server + url: str = openml.config.server if not url.endswith("/"): url += "/" url += endpoint @@ -171,7 +172,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if config.show_progress else None, + progress=ProgressBar() if openml.config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -300,7 +301,7 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_url = config.server.split("/api/") + openml_url: str = openml.config.server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: url += "/" + filename @@ -316,7 +317,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = config.apikey + data["api_key"] = openml.config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -336,8 +337,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if config.apikey: - data["api_key"] = config.apikey + if openml.config.apikey: + data["api_key"] = openml.config.apikey return _send_request( request_method=request_method, url=url, @@ -362,10 +363,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, config.connection_n_retries) + n_retries = max(1, openml.config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if openml.config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/config.py b/openml/_config.py similarity index 95% rename from openml/config.py rename to openml/_config.py index e6104fd7f..c266ae9d9 100644 --- a/openml/config.py +++ b/openml/_config.py @@ -18,6 +18,8 @@ from typing_extensions import TypedDict from urllib.parse import urlparse +from openml.enums import RetryPolicy + logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") console_handler: logging.StreamHandler | None = None @@ -206,6 +208,8 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N retry_policy = value connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries + _sync_api_config() + class ConfigurationForExamples: """Allows easy switching to and from a test configuration, used for examples.""" @@ -244,6 +248,8 @@ def start_using_configuration_for_example(cls) -> None: stacklevel=2, ) + _sync_api_config() + @classmethod def stop_using_configuration_for_example(cls) -> None: """Return to configuration as it was before `start_use_example_configuration`.""" @@ -262,6 +268,8 @@ def stop_using_configuration_for_example(cls) -> None: apikey = cast("str", cls._last_used_key) cls._start_last_called = False + _sync_api_config() + def _handle_xdg_config_home_backwards_compatibility( xdg_home: str, @@ -374,6 +382,8 @@ def _setup(config: _Config | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() + _sync_api_config() + try: cache_exists = _root_cache_directory.exists() # create the cache subdirectory @@ -408,6 +418,8 @@ def set_field_in_config_file(field: str, value: Any) -> None: if value is not None: fh.write(f"{f} = {value}\n") + _sync_api_config() + def _parse_config(config_file: str | Path) -> _Config: """Parse the config file, set up defaults.""" @@ -495,6 +507,8 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: global _root_cache_directory # noqa: PLW0603 _root_cache_directory = Path(root_cache_directory) + _sync_api_config() + start_using_configuration_for_example = ( ConfigurationForExamples.start_using_configuration_for_example @@ -514,6 +528,28 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: _setup(existing_config) +def _sync_api_config() -> None: + """Sync the new API config with the legacy config in this file.""" + from ._api import APIBackend + + p = urlparse(server) + v1_server = f"{p.scheme}://{p.netloc}/" + v1_base_url = p.path.lstrip("/") + connection_retry_policy = RetryPolicy.HUMAN if retry_policy == "human" else RetryPolicy.ROBOT + cache_dir = str(_root_cache_directory) + + APIBackend.set_config_values( + { + "api_configs.v1.server": v1_server, + "api_configs.v1.base_url": v1_base_url, + "api_configs.v1.api_key": apikey, + "cache.dir": cache_dir, + "connection.retry_policy": connection_retry_policy, + "connection.retries": connection_n_retries, + } + ) + + __all__ = [ "get_cache_directory", "get_config_as_dict", diff --git a/openml/_legacy_config.py b/openml/_legacy_config.py new file mode 100644 index 000000000..b26b13c01 --- /dev/null +++ b/openml/_legacy_config.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import Any + + +class LegacyConfigMeta(type): + def __getattr__(cls, name: str) -> Any: + import openml + + return getattr(openml._config, name) + + def __setattr__(cls, name: str, value: Any) -> None: + import openml + + setattr(openml._config, name, value) + + +class LegacyConfig(metaclass=LegacyConfigMeta): + pass diff --git a/openml/base.py b/openml/base.py index a282be8eb..f79bc2931 100644 --- a/openml/base.py +++ b/openml/base.py @@ -8,8 +8,8 @@ import xmltodict +import openml import openml._api_calls -import openml.config from .utils import _get_rest_api_type_alias, _tag_openml_base diff --git a/openml/cli.py b/openml/cli.py index 0afb089c2..2120449e8 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -9,7 +9,7 @@ from pathlib import Path from urllib.parse import urlparse -from openml import config +import openml from openml.__version__ import __version__ @@ -59,17 +59,17 @@ def wait_until_valid_input( def print_configuration() -> None: - file = config.determine_config_file_path() + file = openml.config.determine_config_file_path() header = f"File '{file}' contains (or defaults to):" print(header) - max_key_length = max(map(len, config.get_config_as_dict())) - for field, value in config.get_config_as_dict().items(): + max_key_length = max(map(len, openml.config.get_config_as_dict())) + for field, value in openml.config.get_config_as_dict().items(): print(f"{field.ljust(max_key_length)}: {value}") def verbose_set(field: str, value: str) -> None: - config.set_field_in_config_file(field, value) + openml.config.set_field_in_config_file(field, value) print(f"{field} set to '{value}'.") @@ -82,7 +82,7 @@ def check_apikey(apikey: str) -> str: return "" instructions = ( - f"Your current API key is set to: '{config.apikey}'. " + f"Your current API key is set to: '{openml.config.apikey}'. " "You can get an API key at https://new.openml.org. " "You must create an account if you don't have one yet:\n" " 1. Log in with the account.\n" @@ -347,7 +347,7 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in config._defaults if f not in ["max_retries"]] + configurable_fields = [f for f in openml.config._defaults if f not in ["max_retries"]] parser_configure.add_argument( "field", diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index d9eee278d..59d6205ba 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -17,8 +17,8 @@ import scipy.sparse import xmltodict +import openml from openml.base import OpenMLBase -from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from .data_feature import OpenMLDataFeature @@ -375,7 +375,9 @@ def _download_data(self) -> None: # import required here to avoid circular import. from .functions import _get_dataset_arff, _get_dataset_parquet - skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + skip_parquet = ( + os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + ) if self._parquet_url is not None and not skip_parquet: parquet_file = _get_dataset_parquet(self) self.parquet_file = None if parquet_file is None else str(parquet_file) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 3ac657ea0..432938520 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -19,9 +19,9 @@ import xmltodict from scipy.sparse import coo_matrix +import openml import openml._api_calls import openml.utils -from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from openml.exceptions import ( OpenMLHashException, OpenMLPrivateDatasetError, @@ -492,7 +492,9 @@ def get_dataset( # noqa: C901, PLR0912 qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id) parquet_file = None - skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + skip_parquet = ( + os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + ) download_parquet = "oml:parquet_url" in description and not skip_parquet if download_parquet and (download_data or download_all_files): try: diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py index 5db087024..87df8454a 100644 --- a/openml/evaluations/evaluation.py +++ b/openml/evaluations/evaluation.py @@ -3,7 +3,6 @@ from dataclasses import asdict, dataclass -import openml.config import openml.datasets import openml.flows import openml.runs diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 503788dbd..914a3b46b 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -18,7 +18,6 @@ import openml import openml._api_calls import openml.utils -from openml import config from openml.exceptions import ( OpenMLCacheException, OpenMLRunsExistError, @@ -45,7 +44,6 @@ # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: - from openml.config import _Config from openml.extensions.extension_interface import Extension # get_dict is in run.py to avoid circular imports @@ -107,7 +105,7 @@ def run_model_on_task( # noqa: PLR0913 """ if avoid_duplicate_runs is None: avoid_duplicate_runs = openml.config.avoid_duplicate_runs - if avoid_duplicate_runs and not config.apikey: + if avoid_duplicate_runs and not openml.config.apikey: warnings.warn( "avoid_duplicate_runs is set to True, but no API key is set. " "Please set your API key in the OpenML configuration file, see" @@ -336,7 +334,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 message = f"Executed Task {task.task_id} with Flow id:{run.flow_id}" else: message = f"Executed Task {task.task_id} on local Flow with name {flow.name}." - config.logger.info(message) + openml.config.logger.info(message) return run @@ -528,7 +526,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 # The forked child process may not copy the configuration state of OpenML from the parent. # Current configuration setup needs to be copied and passed to the child processes. - _config = config.get_config_as_dict() + _config = openml.config.get_config_as_dict() # Execute runs in parallel # assuming the same number of tasks as workers (n_jobs), the total compute time for this # statement will be similar to the slowest run @@ -551,7 +549,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 rep_no=rep_no, sample_no=sample_no, task=task, - configuration=_config, + configuration=openml.config._Config, ) for _n_fit, rep_no, fold_no, sample_no in jobs ) # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs` @@ -694,7 +692,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 rep_no: int, sample_no: int, task: OpenMLTask, - configuration: _Config | None = None, + configuration: openml.config._Config | None = None, # type: ignore[name-defined] ) -> tuple[ np.ndarray, pd.DataFrame | None, @@ -719,7 +717,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 Sample number to be run. task : OpenMLTask The task object from OpenML. - configuration : _Config + configuration : openml.config._Config Hyperparameters to configure the model. Returns @@ -733,7 +731,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 """ # Sets up the OpenML instantiated in the child process to match that of the parent's # if configuration=None, loads the default - config._setup(configuration) + openml.config._setup(configuration) train_indices, test_indices = task.get_train_test_split_indices( repeat=rep_no, @@ -762,7 +760,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 f"task_class={task.__class__.__name__}" ) - config.logger.info( + openml.config.logger.info( f"Going to run model {model!s} on " f"dataset {openml.datasets.get_dataset(task.dataset_id).name} " f"for repeat {rep_no} fold {fold_no} sample {sample_no}" diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 4bf279ed1..a24d3a456 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -14,7 +14,6 @@ import openml import openml.exceptions import openml.utils -from openml import config from openml.flows import OpenMLFlow, flow_exists from .setup import OpenMLParameter, OpenMLSetup @@ -84,7 +83,7 @@ def _get_cached_setup(setup_id: int) -> OpenMLSetup: OpenMLCacheException If the setup file for the given setup ID is not cached. """ - cache_dir = Path(config.get_cache_directory()) + cache_dir = Path(openml.config.get_cache_directory()) setup_cache_dir = cache_dir / "setups" / str(setup_id) try: setup_file = setup_cache_dir / "description.xml" @@ -112,7 +111,7 @@ def get_setup(setup_id: int) -> OpenMLSetup: ------- OpenMLSetup (an initialized openml setup object) """ - setup_dir = Path(config.get_cache_directory()) / "setups" / str(setup_id) + setup_dir = Path(openml.config.get_cache_directory()) / "setups" / str(setup_id) setup_dir.mkdir(exist_ok=True, parents=True) setup_file = setup_dir / "description.xml" diff --git a/openml/setups/setup.py b/openml/setups/setup.py index 0960ad4c1..6c63b88ef 100644 --- a/openml/setups/setup.py +++ b/openml/setups/setup.py @@ -3,7 +3,6 @@ from typing import Any -import openml.config import openml.flows diff --git a/openml/study/functions.py b/openml/study/functions.py index bb24ddcff..367537773 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -8,8 +8,8 @@ import pandas as pd import xmltodict +import openml import openml._api_calls -import openml.config import openml.utils from openml.study.study import OpenMLBenchmarkSuite, OpenMLStudy diff --git a/openml/study/study.py b/openml/study/study.py index 7a9c80bbe..803c6455b 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -5,8 +5,8 @@ from collections.abc import Sequence from typing import Any +import openml from openml.base import OpenMLBase -from openml.config import get_server_base_url class BaseStudy(OpenMLBase): @@ -111,7 +111,7 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str]]]: fields["ID"] = self.study_id fields["Study URL"] = self.openml_url if self.creator is not None: - fields["Creator"] = f"{get_server_base_url()}/u/{self.creator}" + fields["Creator"] = f"{openml.config.get_server_base_url()}/u/{self.creator}" if self.creation_date is not None: fields["Upload Time"] = self.creation_date.replace("T", " ") if self.data is not None: diff --git a/openml/tasks/task.py b/openml/tasks/task.py index b297a105c..202abac32 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -11,8 +11,8 @@ from typing import TYPE_CHECKING, Any from typing_extensions import TypedDict +import openml import openml._api_calls -import openml.config from openml import datasets from openml.base import OpenMLBase from openml.utils import _create_cache_directory_for_id diff --git a/openml/utils.py b/openml/utils.py index 3680bc0ff..daa86ab50 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -19,8 +19,6 @@ import openml._api_calls import openml.exceptions -from . import config - # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: from openml.base import OpenMLBase @@ -329,7 +327,7 @@ def _list_all( # noqa: C901 def _get_cache_dir_for_key(key: str) -> Path: - return Path(config.get_cache_directory()) / key + return Path(openml.config.get_cache_directory()) / key def _create_cache_directory(key: str) -> Path: @@ -429,7 +427,7 @@ def safe_func(*args: P.args, **kwargs: P.kwargs) -> R: def _create_lockfiles_dir() -> Path: - path = Path(config.get_cache_directory()) / "locks" + path = Path(openml.config.get_cache_directory()) / "locks" # TODO(eddiebergman): Not sure why this is allowed to error and ignore??? with contextlib.suppress(OSError): path.mkdir(exist_ok=True, parents=True) diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py index a9ad7e8c1..7ea25e55c 100644 --- a/tests/test_evaluations/test_evaluations_example.py +++ b/tests/test_evaluations/test_evaluations_example.py @@ -2,15 +2,14 @@ from __future__ import annotations import unittest - -from openml.config import overwrite_config_context +import openml class TestEvaluationsExample(unittest.TestCase): def test_example_python_paper(self): # Example script which will appear in the upcoming OpenML-Python paper # This test ensures that the example will keep running! - with overwrite_config_context( + with openml.config.overwrite_config_context( { "server": "https://www.openml.org/api/v1/xml", "apikey": None, diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py index a295259ef..6b1cc64b1 100644 --- a/tests/test_openml/test_api_calls.py +++ b/tests/test_openml/test_api_calls.py @@ -9,7 +9,6 @@ import pytest import openml -from openml.config import ConfigurationForExamples import openml.testing from openml._api_calls import _download_minio_bucket, API_TOKEN_HELP_LINK diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 7ef223504..bcb37dcec 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -12,7 +12,7 @@ import pytest -import openml.config +import openml import openml.testing from openml.testing import TestBase From f4aab6bc2191a94ed37aed2dea0e837630baba11 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:24:43 +0500 Subject: [PATCH 068/156] Revert "use LegacyConfig" This reverts commit fd43c489523c1a95e84bc2a95bf2caedd44262c2. --- openml/__init__.py | 7 +--- openml/_api_calls.py | 19 +++++----- openml/_legacy_config.py | 19 ---------- openml/base.py | 2 +- openml/cli.py | 14 ++++---- openml/{_config.py => config.py} | 36 ------------------- openml/datasets/dataset.py | 6 ++-- openml/datasets/functions.py | 6 ++-- openml/evaluations/evaluation.py | 1 + openml/runs/functions.py | 18 +++++----- openml/setups/functions.py | 5 +-- openml/setups/setup.py | 1 + openml/study/functions.py | 2 +- openml/study/study.py | 4 +-- openml/tasks/task.py | 2 +- openml/utils.py | 6 ++-- .../test_evaluations_example.py | 5 +-- tests/test_openml/test_api_calls.py | 1 + tests/test_openml/test_config.py | 2 +- 19 files changed, 50 insertions(+), 106 deletions(-) delete mode 100644 openml/_legacy_config.py rename openml/{_config.py => config.py} (95%) diff --git a/openml/__init__.py b/openml/__init__.py index 30f38f5f0..21dda24ad 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -20,8 +20,7 @@ from . import ( _api_calls, - _config, - _legacy_config, + config, datasets, evaluations, exceptions, @@ -51,8 +50,6 @@ OpenMLTask, ) -config = _legacy_config.LegacyConfig - def populate_cache( task_ids: list[int] | None = None, @@ -114,8 +111,6 @@ def populate_cache( "__version__", "_api_calls", "_backend", - "_config", - "_legacy_config", "config", "datasets", "evaluations", diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 21d5c4391..9e53bd9fa 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -19,8 +19,7 @@ import xmltodict from urllib3 import ProxyManager -import openml - +from . import config from .__version__ import __version__ from .exceptions import ( OpenMLHashException, @@ -71,7 +70,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url: str = openml.config.server + url = config.server if not url.endswith("/"): url += "/" url += endpoint @@ -172,7 +171,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if openml.config.show_progress else None, + progress=ProgressBar() if config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -301,7 +300,7 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_url: str = openml.config.server.split("/api/") + openml_url = config.server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: url += "/" + filename @@ -317,7 +316,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = openml.config.apikey + data["api_key"] = config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -337,8 +336,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if openml.config.apikey: - data["api_key"] = openml.config.apikey + if config.apikey: + data["api_key"] = config.apikey return _send_request( request_method=request_method, url=url, @@ -363,10 +362,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, openml.config.connection_n_retries) + n_retries = max(1, config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if openml.config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/_legacy_config.py b/openml/_legacy_config.py deleted file mode 100644 index b26b13c01..000000000 --- a/openml/_legacy_config.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import annotations - -from typing import Any - - -class LegacyConfigMeta(type): - def __getattr__(cls, name: str) -> Any: - import openml - - return getattr(openml._config, name) - - def __setattr__(cls, name: str, value: Any) -> None: - import openml - - setattr(openml._config, name, value) - - -class LegacyConfig(metaclass=LegacyConfigMeta): - pass diff --git a/openml/base.py b/openml/base.py index f79bc2931..a282be8eb 100644 --- a/openml/base.py +++ b/openml/base.py @@ -8,8 +8,8 @@ import xmltodict -import openml import openml._api_calls +import openml.config from .utils import _get_rest_api_type_alias, _tag_openml_base diff --git a/openml/cli.py b/openml/cli.py index 2120449e8..0afb089c2 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -9,7 +9,7 @@ from pathlib import Path from urllib.parse import urlparse -import openml +from openml import config from openml.__version__ import __version__ @@ -59,17 +59,17 @@ def wait_until_valid_input( def print_configuration() -> None: - file = openml.config.determine_config_file_path() + file = config.determine_config_file_path() header = f"File '{file}' contains (or defaults to):" print(header) - max_key_length = max(map(len, openml.config.get_config_as_dict())) - for field, value in openml.config.get_config_as_dict().items(): + max_key_length = max(map(len, config.get_config_as_dict())) + for field, value in config.get_config_as_dict().items(): print(f"{field.ljust(max_key_length)}: {value}") def verbose_set(field: str, value: str) -> None: - openml.config.set_field_in_config_file(field, value) + config.set_field_in_config_file(field, value) print(f"{field} set to '{value}'.") @@ -82,7 +82,7 @@ def check_apikey(apikey: str) -> str: return "" instructions = ( - f"Your current API key is set to: '{openml.config.apikey}'. " + f"Your current API key is set to: '{config.apikey}'. " "You can get an API key at https://new.openml.org. " "You must create an account if you don't have one yet:\n" " 1. Log in with the account.\n" @@ -347,7 +347,7 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in openml.config._defaults if f not in ["max_retries"]] + configurable_fields = [f for f in config._defaults if f not in ["max_retries"]] parser_configure.add_argument( "field", diff --git a/openml/_config.py b/openml/config.py similarity index 95% rename from openml/_config.py rename to openml/config.py index c266ae9d9..e6104fd7f 100644 --- a/openml/_config.py +++ b/openml/config.py @@ -18,8 +18,6 @@ from typing_extensions import TypedDict from urllib.parse import urlparse -from openml.enums import RetryPolicy - logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") console_handler: logging.StreamHandler | None = None @@ -208,8 +206,6 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N retry_policy = value connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries - _sync_api_config() - class ConfigurationForExamples: """Allows easy switching to and from a test configuration, used for examples.""" @@ -248,8 +244,6 @@ def start_using_configuration_for_example(cls) -> None: stacklevel=2, ) - _sync_api_config() - @classmethod def stop_using_configuration_for_example(cls) -> None: """Return to configuration as it was before `start_use_example_configuration`.""" @@ -268,8 +262,6 @@ def stop_using_configuration_for_example(cls) -> None: apikey = cast("str", cls._last_used_key) cls._start_last_called = False - _sync_api_config() - def _handle_xdg_config_home_backwards_compatibility( xdg_home: str, @@ -382,8 +374,6 @@ def _setup(config: _Config | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() - _sync_api_config() - try: cache_exists = _root_cache_directory.exists() # create the cache subdirectory @@ -418,8 +408,6 @@ def set_field_in_config_file(field: str, value: Any) -> None: if value is not None: fh.write(f"{f} = {value}\n") - _sync_api_config() - def _parse_config(config_file: str | Path) -> _Config: """Parse the config file, set up defaults.""" @@ -507,8 +495,6 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: global _root_cache_directory # noqa: PLW0603 _root_cache_directory = Path(root_cache_directory) - _sync_api_config() - start_using_configuration_for_example = ( ConfigurationForExamples.start_using_configuration_for_example @@ -528,28 +514,6 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: _setup(existing_config) -def _sync_api_config() -> None: - """Sync the new API config with the legacy config in this file.""" - from ._api import APIBackend - - p = urlparse(server) - v1_server = f"{p.scheme}://{p.netloc}/" - v1_base_url = p.path.lstrip("/") - connection_retry_policy = RetryPolicy.HUMAN if retry_policy == "human" else RetryPolicy.ROBOT - cache_dir = str(_root_cache_directory) - - APIBackend.set_config_values( - { - "api_configs.v1.server": v1_server, - "api_configs.v1.base_url": v1_base_url, - "api_configs.v1.api_key": apikey, - "cache.dir": cache_dir, - "connection.retry_policy": connection_retry_policy, - "connection.retries": connection_n_retries, - } - ) - - __all__ = [ "get_cache_directory", "get_config_as_dict", diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index 59d6205ba..d9eee278d 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -17,8 +17,8 @@ import scipy.sparse import xmltodict -import openml from openml.base import OpenMLBase +from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from .data_feature import OpenMLDataFeature @@ -375,9 +375,7 @@ def _download_data(self) -> None: # import required here to avoid circular import. from .functions import _get_dataset_arff, _get_dataset_parquet - skip_parquet = ( - os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" - ) + skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" if self._parquet_url is not None and not skip_parquet: parquet_file = _get_dataset_parquet(self) self.parquet_file = None if parquet_file is None else str(parquet_file) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 432938520..3ac657ea0 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -19,9 +19,9 @@ import xmltodict from scipy.sparse import coo_matrix -import openml import openml._api_calls import openml.utils +from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from openml.exceptions import ( OpenMLHashException, OpenMLPrivateDatasetError, @@ -492,9 +492,7 @@ def get_dataset( # noqa: C901, PLR0912 qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id) parquet_file = None - skip_parquet = ( - os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" - ) + skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" download_parquet = "oml:parquet_url" in description and not skip_parquet if download_parquet and (download_data or download_all_files): try: diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py index 87df8454a..5db087024 100644 --- a/openml/evaluations/evaluation.py +++ b/openml/evaluations/evaluation.py @@ -3,6 +3,7 @@ from dataclasses import asdict, dataclass +import openml.config import openml.datasets import openml.flows import openml.runs diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 914a3b46b..503788dbd 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -18,6 +18,7 @@ import openml import openml._api_calls import openml.utils +from openml import config from openml.exceptions import ( OpenMLCacheException, OpenMLRunsExistError, @@ -44,6 +45,7 @@ # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: + from openml.config import _Config from openml.extensions.extension_interface import Extension # get_dict is in run.py to avoid circular imports @@ -105,7 +107,7 @@ def run_model_on_task( # noqa: PLR0913 """ if avoid_duplicate_runs is None: avoid_duplicate_runs = openml.config.avoid_duplicate_runs - if avoid_duplicate_runs and not openml.config.apikey: + if avoid_duplicate_runs and not config.apikey: warnings.warn( "avoid_duplicate_runs is set to True, but no API key is set. " "Please set your API key in the OpenML configuration file, see" @@ -334,7 +336,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 message = f"Executed Task {task.task_id} with Flow id:{run.flow_id}" else: message = f"Executed Task {task.task_id} on local Flow with name {flow.name}." - openml.config.logger.info(message) + config.logger.info(message) return run @@ -526,7 +528,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 # The forked child process may not copy the configuration state of OpenML from the parent. # Current configuration setup needs to be copied and passed to the child processes. - _config = openml.config.get_config_as_dict() + _config = config.get_config_as_dict() # Execute runs in parallel # assuming the same number of tasks as workers (n_jobs), the total compute time for this # statement will be similar to the slowest run @@ -549,7 +551,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 rep_no=rep_no, sample_no=sample_no, task=task, - configuration=openml.config._Config, + configuration=_config, ) for _n_fit, rep_no, fold_no, sample_no in jobs ) # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs` @@ -692,7 +694,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 rep_no: int, sample_no: int, task: OpenMLTask, - configuration: openml.config._Config | None = None, # type: ignore[name-defined] + configuration: _Config | None = None, ) -> tuple[ np.ndarray, pd.DataFrame | None, @@ -717,7 +719,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 Sample number to be run. task : OpenMLTask The task object from OpenML. - configuration : openml.config._Config + configuration : _Config Hyperparameters to configure the model. Returns @@ -731,7 +733,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 """ # Sets up the OpenML instantiated in the child process to match that of the parent's # if configuration=None, loads the default - openml.config._setup(configuration) + config._setup(configuration) train_indices, test_indices = task.get_train_test_split_indices( repeat=rep_no, @@ -760,7 +762,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 f"task_class={task.__class__.__name__}" ) - openml.config.logger.info( + config.logger.info( f"Going to run model {model!s} on " f"dataset {openml.datasets.get_dataset(task.dataset_id).name} " f"for repeat {rep_no} fold {fold_no} sample {sample_no}" diff --git a/openml/setups/functions.py b/openml/setups/functions.py index a24d3a456..4bf279ed1 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -14,6 +14,7 @@ import openml import openml.exceptions import openml.utils +from openml import config from openml.flows import OpenMLFlow, flow_exists from .setup import OpenMLParameter, OpenMLSetup @@ -83,7 +84,7 @@ def _get_cached_setup(setup_id: int) -> OpenMLSetup: OpenMLCacheException If the setup file for the given setup ID is not cached. """ - cache_dir = Path(openml.config.get_cache_directory()) + cache_dir = Path(config.get_cache_directory()) setup_cache_dir = cache_dir / "setups" / str(setup_id) try: setup_file = setup_cache_dir / "description.xml" @@ -111,7 +112,7 @@ def get_setup(setup_id: int) -> OpenMLSetup: ------- OpenMLSetup (an initialized openml setup object) """ - setup_dir = Path(openml.config.get_cache_directory()) / "setups" / str(setup_id) + setup_dir = Path(config.get_cache_directory()) / "setups" / str(setup_id) setup_dir.mkdir(exist_ok=True, parents=True) setup_file = setup_dir / "description.xml" diff --git a/openml/setups/setup.py b/openml/setups/setup.py index 6c63b88ef..0960ad4c1 100644 --- a/openml/setups/setup.py +++ b/openml/setups/setup.py @@ -3,6 +3,7 @@ from typing import Any +import openml.config import openml.flows diff --git a/openml/study/functions.py b/openml/study/functions.py index 367537773..bb24ddcff 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -8,8 +8,8 @@ import pandas as pd import xmltodict -import openml import openml._api_calls +import openml.config import openml.utils from openml.study.study import OpenMLBenchmarkSuite, OpenMLStudy diff --git a/openml/study/study.py b/openml/study/study.py index 803c6455b..7a9c80bbe 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -5,8 +5,8 @@ from collections.abc import Sequence from typing import Any -import openml from openml.base import OpenMLBase +from openml.config import get_server_base_url class BaseStudy(OpenMLBase): @@ -111,7 +111,7 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str]]]: fields["ID"] = self.study_id fields["Study URL"] = self.openml_url if self.creator is not None: - fields["Creator"] = f"{openml.config.get_server_base_url()}/u/{self.creator}" + fields["Creator"] = f"{get_server_base_url()}/u/{self.creator}" if self.creation_date is not None: fields["Upload Time"] = self.creation_date.replace("T", " ") if self.data is not None: diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 202abac32..b297a105c 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -11,8 +11,8 @@ from typing import TYPE_CHECKING, Any from typing_extensions import TypedDict -import openml import openml._api_calls +import openml.config from openml import datasets from openml.base import OpenMLBase from openml.utils import _create_cache_directory_for_id diff --git a/openml/utils.py b/openml/utils.py index daa86ab50..3680bc0ff 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -19,6 +19,8 @@ import openml._api_calls import openml.exceptions +from . import config + # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: from openml.base import OpenMLBase @@ -327,7 +329,7 @@ def _list_all( # noqa: C901 def _get_cache_dir_for_key(key: str) -> Path: - return Path(openml.config.get_cache_directory()) / key + return Path(config.get_cache_directory()) / key def _create_cache_directory(key: str) -> Path: @@ -427,7 +429,7 @@ def safe_func(*args: P.args, **kwargs: P.kwargs) -> R: def _create_lockfiles_dir() -> Path: - path = Path(openml.config.get_cache_directory()) / "locks" + path = Path(config.get_cache_directory()) / "locks" # TODO(eddiebergman): Not sure why this is allowed to error and ignore??? with contextlib.suppress(OSError): path.mkdir(exist_ok=True, parents=True) diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py index 7ea25e55c..a9ad7e8c1 100644 --- a/tests/test_evaluations/test_evaluations_example.py +++ b/tests/test_evaluations/test_evaluations_example.py @@ -2,14 +2,15 @@ from __future__ import annotations import unittest -import openml + +from openml.config import overwrite_config_context class TestEvaluationsExample(unittest.TestCase): def test_example_python_paper(self): # Example script which will appear in the upcoming OpenML-Python paper # This test ensures that the example will keep running! - with openml.config.overwrite_config_context( + with overwrite_config_context( { "server": "https://www.openml.org/api/v1/xml", "apikey": None, diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py index 6b1cc64b1..a295259ef 100644 --- a/tests/test_openml/test_api_calls.py +++ b/tests/test_openml/test_api_calls.py @@ -9,6 +9,7 @@ import pytest import openml +from openml.config import ConfigurationForExamples import openml.testing from openml._api_calls import _download_minio_bucket, API_TOKEN_HELP_LINK diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index bcb37dcec..7ef223504 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -12,7 +12,7 @@ import pytest -import openml +import openml.config import openml.testing from openml.testing import TestBase From d43cf86f3869392976d70fdbeba0d140ac1e04f3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:35:57 +0500 Subject: [PATCH 069/156] implement _sync_api_config --- openml/config.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/openml/config.py b/openml/config.py index e6104fd7f..c266ae9d9 100644 --- a/openml/config.py +++ b/openml/config.py @@ -18,6 +18,8 @@ from typing_extensions import TypedDict from urllib.parse import urlparse +from openml.enums import RetryPolicy + logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") console_handler: logging.StreamHandler | None = None @@ -206,6 +208,8 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N retry_policy = value connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries + _sync_api_config() + class ConfigurationForExamples: """Allows easy switching to and from a test configuration, used for examples.""" @@ -244,6 +248,8 @@ def start_using_configuration_for_example(cls) -> None: stacklevel=2, ) + _sync_api_config() + @classmethod def stop_using_configuration_for_example(cls) -> None: """Return to configuration as it was before `start_use_example_configuration`.""" @@ -262,6 +268,8 @@ def stop_using_configuration_for_example(cls) -> None: apikey = cast("str", cls._last_used_key) cls._start_last_called = False + _sync_api_config() + def _handle_xdg_config_home_backwards_compatibility( xdg_home: str, @@ -374,6 +382,8 @@ def _setup(config: _Config | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() + _sync_api_config() + try: cache_exists = _root_cache_directory.exists() # create the cache subdirectory @@ -408,6 +418,8 @@ def set_field_in_config_file(field: str, value: Any) -> None: if value is not None: fh.write(f"{f} = {value}\n") + _sync_api_config() + def _parse_config(config_file: str | Path) -> _Config: """Parse the config file, set up defaults.""" @@ -495,6 +507,8 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: global _root_cache_directory # noqa: PLW0603 _root_cache_directory = Path(root_cache_directory) + _sync_api_config() + start_using_configuration_for_example = ( ConfigurationForExamples.start_using_configuration_for_example @@ -514,6 +528,28 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: _setup(existing_config) +def _sync_api_config() -> None: + """Sync the new API config with the legacy config in this file.""" + from ._api import APIBackend + + p = urlparse(server) + v1_server = f"{p.scheme}://{p.netloc}/" + v1_base_url = p.path.lstrip("/") + connection_retry_policy = RetryPolicy.HUMAN if retry_policy == "human" else RetryPolicy.ROBOT + cache_dir = str(_root_cache_directory) + + APIBackend.set_config_values( + { + "api_configs.v1.server": v1_server, + "api_configs.v1.base_url": v1_base_url, + "api_configs.v1.api_key": apikey, + "cache.dir": cache_dir, + "connection.retry_policy": connection_retry_policy, + "connection.retries": connection_n_retries, + } + ) + + __all__ = [ "get_cache_directory", "get_config_as_dict", From 3e323edff1787e01f8f9aa74e419f3f27fc9400b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:36:18 +0500 Subject: [PATCH 070/156] update tests with _sync_api_config --- openml/testing.py | 3 +++ tests/conftest.py | 3 +++ tests/test_datasets/test_dataset_functions.py | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/openml/testing.py b/openml/testing.py index a971aa1c3..a3d137916 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -110,6 +110,7 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: self.retry_policy = openml.config.retry_policy self.connection_n_retries = openml.config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) + openml.config._sync_api_config() def use_production_server(self) -> None: """ @@ -119,6 +120,7 @@ def use_production_server(self) -> None: """ openml.config.server = self.production_server openml.config.apikey = "" + openml.config._sync_api_config() def tearDown(self) -> None: """Tear down the test""" @@ -132,6 +134,7 @@ def tearDown(self) -> None: openml.config.connection_n_retries = self.connection_n_retries openml.config.retry_policy = self.retry_policy + openml.config._sync_api_config() @classmethod def _mark_entity_for_removal( diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..bcf93bd72 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -99,6 +99,7 @@ def delete_remote_files(tracker, flow_names) -> None: """ openml.config.server = TestBase.test_server openml.config.apikey = TestBase.user_key + openml.config._sync_api_config() # reordering to delete sub flows at the end of flows # sub-flows have shorter names, hence, sorting by descending order of flow name length @@ -275,10 +276,12 @@ def with_server(request): if "production" in request.keywords: openml.config.server = "https://www.openml.org/api/v1/xml" openml.config.apikey = None + openml.config._sync_api_config() yield return openml.config.server = "https://test.openml.org/api/v1/xml" openml.config.apikey = TestBase.user_key + openml.config._sync_api_config() yield diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index c41664ba7..39a6c9cae 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -158,6 +158,7 @@ def test_check_datasets_active(self): [79], ) openml.config.server = self.test_server + openml.config._sync_api_config() @pytest.mark.uses_test_server() def test_illegal_character_tag(self): @@ -186,6 +187,7 @@ def test__name_to_id_with_deactivated(self): # /d/1 was deactivated assert openml.datasets.functions._name_to_id("anneal") == 2 openml.config.server = self.test_server + openml.config._sync_api_config() @pytest.mark.production() def test__name_to_id_with_multiple_active(self): @@ -438,6 +440,7 @@ def test__getarff_md5_issue(self): } n = openml.config.connection_n_retries openml.config.connection_n_retries = 1 + openml.config._sync_api_config() self.assertRaisesRegex( OpenMLHashException, @@ -448,6 +451,7 @@ def test__getarff_md5_issue(self): ) openml.config.connection_n_retries = n + openml.config._sync_api_config() @pytest.mark.uses_test_server() def test__get_dataset_features(self): @@ -617,6 +621,7 @@ def test_data_status(self): # admin key for test server (only admins can activate datasets. # all users can deactivate their own datasets) openml.config.apikey = TestBase.admin_key + openml.config._sync_api_config() openml.datasets.status_update(did, "active") self._assert_status_of_dataset(did=did, status="active") @@ -1555,6 +1560,7 @@ def test_list_datasets_with_high_size_parameter(self): # Reverting to test server openml.config.server = self.test_server + openml.config._sync_api_config() assert len(datasets_a) == len(datasets_b) From 9195fa6ea6de253141fe68e922fd414c85b1d806 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:51:44 +0500 Subject: [PATCH 071/156] rename config: timeout -> timeout_seconds --- openml/_api/clients/http.py | 6 +++--- openml/_api/setup/builder.py | 4 ++-- openml/_api/setup/config.py | 4 ++-- openml/testing.py | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 353cd5e9e..2c1e52d19 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -116,7 +116,7 @@ def __init__( # noqa: PLR0913 server: str, base_url: str, api_key: str, - timeout: int, + timeout_seconds: int, retries: int, retry_policy: RetryPolicy, cache: HTTPCache | None = None, @@ -124,7 +124,7 @@ def __init__( # noqa: PLR0913 self.server = server self.base_url = base_url self.api_key = api_key - self.timeout = timeout + self.timeout_seconds = timeout_seconds self.retries = retries self.retry_policy = retry_policy self.cache = cache @@ -343,7 +343,7 @@ def request( headers = request_kwargs.pop("headers", {}).copy() headers.update(self.headers) - timeout = request_kwargs.pop("timeout", self.timeout) + timeout = request_kwargs.pop("timeout", self.timeout_seconds) files = request_kwargs.pop("files", None) if use_cache and not reset_cache and self.cache is not None: diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 750db431a..d411189ee 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -33,7 +33,7 @@ def build(cls, config: Config) -> APIBackendBuilder: server=primary_api_config.server, base_url=primary_api_config.base_url, api_key=primary_api_config.api_key, - timeout=config.connection.timeout, + timeout_seconds=config.connection.timeout_seconds, retries=config.connection.retries, retry_policy=config.connection.retry_policy, cache=http_cache, @@ -51,7 +51,7 @@ def build(cls, config: Config) -> APIBackendBuilder: server=fallback_api_config.server, base_url=fallback_api_config.base_url, api_key=fallback_api_config.api_key, - timeout=config.connection.timeout, + timeout_seconds=config.connection.timeout_seconds, retries=config.connection.retries, retry_policy=config.connection.retry_policy, cache=http_cache, diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index ea868262a..8e8fc1f5d 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -18,7 +18,7 @@ class APIConfig: class ConnectionConfig: retries: int retry_policy: RetryPolicy - timeout: int + timeout_seconds: int @dataclass @@ -51,7 +51,7 @@ class Config: default_factory=lambda: ConnectionConfig( retries=5, retry_policy=RetryPolicy.HUMAN, - timeout=10, + timeout_seconds=10, ) ) diff --git a/openml/testing.py b/openml/testing.py index a3d137916..2087283d3 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -286,7 +286,7 @@ class TestAPIBase(unittest.TestCase): server: str base_url: str api_key: str - timeout: int + timeout_seconds: int retries: int retry_policy: RetryPolicy dir: str @@ -298,7 +298,7 @@ def setUp(self) -> None: self.server = "https://test.openml.org/" self.base_url = "api/v1/xml" self.api_key = "normaluser" - self.timeout = 10 + self.timeout_seconds = 10 self.retries = 3 self.retry_policy = RetryPolicy.HUMAN self.dir = "test_cache" @@ -312,7 +312,7 @@ def setUp(self) -> None: server=self.server, base_url=self.base_url, api_key=self.api_key, - timeout=self.timeout, + timeout_seconds=self.timeout_seconds, retries=self.retries, retry_policy=self.retry_policy, cache=self.cache, @@ -340,7 +340,7 @@ def _get_http_client( # noqa: PLR0913 server: str, base_url: str, api_key: str, - timeout: int, + timeout_seconds: int, retries: int, retry_policy: RetryPolicy, cache: HTTPCache | None = None, @@ -349,7 +349,7 @@ def _get_http_client( # noqa: PLR0913 server=server, base_url=base_url, api_key=api_key, - timeout=timeout, + timeout_seconds=timeout_seconds, retries=retries, retry_policy=retry_policy, cache=cache, From 5342eec3716e1c50ee020156702bb658d7e37cba Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:57:07 +0500 Subject: [PATCH 072/156] use timedelta for default ttl value --- openml/_api/setup/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 8e8fc1f5d..9b87ffbaf 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from datetime import timedelta from openml.enums import APIVersion, RetryPolicy @@ -58,6 +59,6 @@ class Config: cache: CacheConfig = field( default_factory=lambda: CacheConfig( dir=str(_resolve_default_cache_dir()), - ttl=60 * 60 * 24 * 7, + ttl=int(timedelta(weeks=1).total_seconds()), ) ) From adc0e7498469154d32fa5a16f637b5792964dd49 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 13:35:04 +0500 Subject: [PATCH 073/156] update tests, adds v2/fallback --- tests/test_api/test_versions.py | 56 ++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 6a4cad97d..4906cf9f4 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,8 +1,9 @@ from time import time import pytest from openml.testing import TestAPIBase -from openml._api import ResourceV1API +from openml._api import ResourceV1API, ResourceV2API, FallbackProxy from openml.enums import ResourceType +from openml.exceptions import OpenMLNotSupportedError class TestResourceV1API(TestAPIBase): @@ -51,3 +52,56 @@ def test_tag_and_untag(self): tags = self.resource.untag(resource_id, tag) self.assertNotIn(tag, tags) + + +class TestResourceV2API(TestResourceV1API): + def setUp(self): + super().setUp() + + self.server = "" + self.base_url = "" + self.api_key = "" + self.http_client = self._get_http_client( + server=self.server, + base_url=self.base_url, + api_key=self.api_key, + timeout_seconds=self.timeout_seconds, + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ) + + self.resource = ResourceV2API(self.http_client) + self.resource.resource_type = ResourceType.TASK + + @pytest.mark.xfail(raises=OpenMLNotSupportedError) + def test_publish_and_delete(self): + super().test_tag_and_untag() + + + @pytest.mark.xfail(raises=OpenMLNotSupportedError) + def test_tag_and_untag(self): + super().test_tag_and_untag() + + +class TestResourceFallbackAPI(TestResourceV1API): + def setUp(self): + super().setUp() + + self.http_client_v2 = self._get_http_client( + server="", + base_url="", + api_key="", + timeout_seconds=self.timeout_seconds, + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ) + + resource_v1 = ResourceV1API(self.http_client) + resource_v1.resource_type = ResourceType.TASK + + resource_v2 = ResourceV2API(self.http_client_v2) + resource_v2.resource_type = ResourceType.TASK + + self.resource = FallbackProxy(resource_v2, resource_v1) From bfb2d3e18a83982391f6653ec12fd710bbb92412 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 13:39:42 +0500 Subject: [PATCH 074/156] add MinIOClient in TestBase --- openml/testing.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/openml/testing.py b/openml/testing.py index 2087283d3..5f0697f87 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -16,7 +16,7 @@ import requests import openml -from openml._api import HTTPCache, HTTPClient +from openml._api import HTTPCache, HTTPClient, MinIOClient from openml.enums import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -317,6 +317,7 @@ def setUp(self) -> None: retry_policy=self.retry_policy, cache=self.cache, ) + self.minio_client = self._get_minio_client(path=Path(self.dir)) if self.cache.path.exists(): shutil.rmtree(self.cache.path) @@ -355,6 +356,12 @@ def _get_http_client( # noqa: PLR0913 cache=cache, ) + def _get_minio_client( + self, + path: Path | None = None, + ) -> MinIOClient: + return MinIOClient(path=path) + def _get_url( self, server: str | None = None, From cabaecf27704d0797bcb8d4c855c6e5280b03945 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 18:43:37 +0500 Subject: [PATCH 075/156] fix linting for builder --- openml/_api/setup/backend.py | 56 +++++++++++++++++++++++++++++++----- openml/_api/setup/builder.py | 14 ++++++--- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index 4dd0f4390..c29d1dbad 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -1,11 +1,24 @@ from __future__ import annotations from copy import deepcopy -from typing import Any +from typing import TYPE_CHECKING, Any, cast from .builder import APIBackendBuilder from .config import Config +if TYPE_CHECKING: + from openml._api.resources import ( + DatasetAPI, + EstimationProcedureAPI, + EvaluationAPI, + EvaluationMeasureAPI, + FlowAPI, + RunAPI, + SetupAPI, + StudyAPI, + TaskAPI, + ) + class APIBackend: _instance: APIBackend | None = None @@ -14,12 +27,41 @@ def __init__(self, config: Config | None = None): self._config: Config = config or Config() self._backend = APIBackendBuilder.build(self._config) - def __getattr__(self, name: str) -> Any: - """ - Delegate attribute access to the underlying backend. - Called only if attribute is not found on RuntimeBackend. - """ - return getattr(self._backend, name) + @property + def dataset(self) -> DatasetAPI: + return cast("DatasetAPI", self._backend.dataset) + + @property + def task(self) -> TaskAPI: + return cast("TaskAPI", self._backend.task) + + @property + def evaluation_measure(self) -> EvaluationMeasureAPI: + return cast("EvaluationMeasureAPI", self._backend.evaluation_measure) + + @property + def estimation_procedure(self) -> EstimationProcedureAPI: + return cast("EstimationProcedureAPI", self._backend.estimation_procedure) + + @property + def evaluation(self) -> EvaluationAPI: + return cast("EvaluationAPI", self._backend.evaluation) + + @property + def flow(self) -> FlowAPI: + return cast("FlowAPI", self._backend.flow) + + @property + def study(self) -> StudyAPI: + return cast("StudyAPI", self._backend.study) + + @property + def run(self) -> RunAPI: + return cast("RunAPI", self._backend.run) + + @property + def setup(self) -> SetupAPI: + return cast("SetupAPI", self._backend.setup) @classmethod def get_instance(cls) -> APIBackend: diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index d411189ee..5518a2a13 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -6,10 +6,9 @@ from openml._api.clients import HTTPCache, HTTPClient, MinIOClient from openml._api.resources import API_REGISTRY, FallbackProxy, ResourceAPI +from openml.enums import ResourceType if TYPE_CHECKING: - from openml.enums import ResourceType - from .config import Config @@ -18,8 +17,15 @@ def __init__( self, resource_apis: Mapping[ResourceType, ResourceAPI | FallbackProxy], ): - for resource_type, resource_api in resource_apis.items(): - setattr(self, resource_type.value, resource_api) + self.dataset = resource_apis[ResourceType.DATASET] + self.task = resource_apis[ResourceType.TASK] + self.evaluation_measure = resource_apis[ResourceType.EVALUATION_MEASURE] + self.estimation_procedure = resource_apis[ResourceType.ESTIMATION_PROCEDURE] + self.evaluation = resource_apis[ResourceType.EVALUATION] + self.flow = resource_apis[ResourceType.FLOW] + self.study = resource_apis[ResourceType.STUDY] + self.run = resource_apis[ResourceType.RUN] + self.setup = resource_apis[ResourceType.SETUP] @classmethod def build(cls, config: Config) -> APIBackendBuilder: From 85c11139928fc3de67e2c8e1527a77db07d95887 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 4 Feb 2026 13:57:00 +0500 Subject: [PATCH 076/156] fix unbound variables: "code", "message" source: https://github.com/openml/openml-python/pull/1606#issuecomment-3844025047 --- openml/_api/clients/http.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 2c1e52d19..323da8793 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -238,6 +238,8 @@ def _validate_response( raise OpenMLServerError(f"URI too long! ({url})") retry_raise_e: Exception | None = None + code: int | None = None + message: str = "" try: code, message = self._parse_exception_response(response) From 39bf86a3a62bff24ffc41f10feef93eb62687b8a Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 4 Feb 2026 14:19:02 +0500 Subject: [PATCH 077/156] use requests.Session() --- openml/_api/clients/http.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 323da8793..98b19a937 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -278,6 +278,7 @@ def _validate_response( def _request( # noqa: PLR0913 self, + session: requests.Session, method: str, url: str, params: Mapping[str, Any], @@ -291,7 +292,7 @@ def _request( # noqa: PLR0913 response: Response | None = None try: - response = requests.request( + response = session.request( method=method, url=url, params=params, @@ -357,8 +358,10 @@ def request( except Exception: raise # propagate unexpected cache errors + session = requests.Session() for retry_counter in range(1, retries + 1): response, retry_raise_e = self._request( + session=session, method=method, url=url, params=params, @@ -379,6 +382,8 @@ def request( delay = self.retry_func(retry_counter) time.sleep(delay) + session.close() + assert response is not None if use_cache and self.cache is not None: From 7b66677988e73a5b67a599d8a64aac97f1dee2d8 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 4 Feb 2026 14:20:44 +0500 Subject: [PATCH 078/156] remove "timeout_seconds" entirely - removing this since it was not part of the sdk previously - some tests fail because of the timeout in stacked PRs - this option can easily be added if needed in future --- openml/_api/clients/http.py | 6 ------ openml/_api/setup/builder.py | 2 -- openml/_api/setup/config.py | 2 -- openml/testing.py | 5 ----- tests/test_api/test_versions.py | 2 -- 5 files changed, 17 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 98b19a937..db782cca7 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -116,7 +116,6 @@ def __init__( # noqa: PLR0913 server: str, base_url: str, api_key: str, - timeout_seconds: int, retries: int, retry_policy: RetryPolicy, cache: HTTPCache | None = None, @@ -124,7 +123,6 @@ def __init__( # noqa: PLR0913 self.server = server self.base_url = base_url self.api_key = api_key - self.timeout_seconds = timeout_seconds self.retries = retries self.retry_policy = retry_policy self.cache = cache @@ -284,7 +282,6 @@ def _request( # noqa: PLR0913 params: Mapping[str, Any], data: Mapping[str, Any], headers: Mapping[str, str], - timeout: float | int, files: Mapping[str, Any] | None, **request_kwargs: Any, ) -> tuple[Response | None, Exception | None]: @@ -298,7 +295,6 @@ def _request( # noqa: PLR0913 params=params, data=data, headers=headers, - timeout=timeout, files=files, **request_kwargs, ) @@ -346,7 +342,6 @@ def request( headers = request_kwargs.pop("headers", {}).copy() headers.update(self.headers) - timeout = request_kwargs.pop("timeout", self.timeout_seconds) files = request_kwargs.pop("files", None) if use_cache and not reset_cache and self.cache is not None: @@ -367,7 +362,6 @@ def request( params=params, data=data, headers=headers, - timeout=timeout, files=files, **request_kwargs, ) diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 5518a2a13..f801fe525 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -39,7 +39,6 @@ def build(cls, config: Config) -> APIBackendBuilder: server=primary_api_config.server, base_url=primary_api_config.base_url, api_key=primary_api_config.api_key, - timeout_seconds=config.connection.timeout_seconds, retries=config.connection.retries, retry_policy=config.connection.retry_policy, cache=http_cache, @@ -57,7 +56,6 @@ def build(cls, config: Config) -> APIBackendBuilder: server=fallback_api_config.server, base_url=fallback_api_config.base_url, api_key=fallback_api_config.api_key, - timeout_seconds=config.connection.timeout_seconds, retries=config.connection.retries, retry_policy=config.connection.retry_policy, cache=http_cache, diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 9b87ffbaf..4108227aa 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -19,7 +19,6 @@ class APIConfig: class ConnectionConfig: retries: int retry_policy: RetryPolicy - timeout_seconds: int @dataclass @@ -52,7 +51,6 @@ class Config: default_factory=lambda: ConnectionConfig( retries=5, retry_policy=RetryPolicy.HUMAN, - timeout_seconds=10, ) ) diff --git a/openml/testing.py b/openml/testing.py index 5f0697f87..d254b7bcb 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -286,7 +286,6 @@ class TestAPIBase(unittest.TestCase): server: str base_url: str api_key: str - timeout_seconds: int retries: int retry_policy: RetryPolicy dir: str @@ -298,7 +297,6 @@ def setUp(self) -> None: self.server = "https://test.openml.org/" self.base_url = "api/v1/xml" self.api_key = "normaluser" - self.timeout_seconds = 10 self.retries = 3 self.retry_policy = RetryPolicy.HUMAN self.dir = "test_cache" @@ -312,7 +310,6 @@ def setUp(self) -> None: server=self.server, base_url=self.base_url, api_key=self.api_key, - timeout_seconds=self.timeout_seconds, retries=self.retries, retry_policy=self.retry_policy, cache=self.cache, @@ -341,7 +338,6 @@ def _get_http_client( # noqa: PLR0913 server: str, base_url: str, api_key: str, - timeout_seconds: int, retries: int, retry_policy: RetryPolicy, cache: HTTPCache | None = None, @@ -350,7 +346,6 @@ def _get_http_client( # noqa: PLR0913 server=server, base_url=base_url, api_key=api_key, - timeout_seconds=timeout_seconds, retries=retries, retry_policy=retry_policy, cache=cache, diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 4906cf9f4..9f9e61ba6 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -65,7 +65,6 @@ def setUp(self): server=self.server, base_url=self.base_url, api_key=self.api_key, - timeout_seconds=self.timeout_seconds, retries=self.retries, retry_policy=self.retry_policy, cache=self.cache, @@ -92,7 +91,6 @@ def setUp(self): server="", base_url="", api_key="", - timeout_seconds=self.timeout_seconds, retries=self.retries, retry_policy=self.retry_policy, cache=self.cache, From d2224c462b7bc46b129dfab5b7887f700c1fda69 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 4 Feb 2026 22:42:25 +0500 Subject: [PATCH 079/156] update/refactor tests --- openml/testing.py | 114 +++++++++++--------------------- tests/test_api/test_http.py | 20 ++++-- tests/test_api/test_versions.py | 103 ++++++++++++----------------- 3 files changed, 97 insertions(+), 140 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index d254b7bcb..d73e15a2d 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -11,13 +11,12 @@ import unittest from pathlib import Path from typing import ClassVar -from urllib.parse import urljoin import requests import openml from openml._api import HTTPCache, HTTPClient, MinIOClient -from openml.enums import RetryPolicy +from openml.enums import APIVersion, RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -283,90 +282,53 @@ def _check_fold_timing_evaluations( # noqa: PLR0913 class TestAPIBase(unittest.TestCase): - server: str - base_url: str - api_key: str retries: int retry_policy: RetryPolicy - dir: str ttl: int + cache_dir: Path cache: HTTPCache - http_client: HTTPClient + http_clients: dict[APIVersion, HTTPClient] + minio_client: MinIOClient + current_api_version: APIVersion | None def setUp(self) -> None: - self.server = "https://test.openml.org/" - self.base_url = "api/v1/xml" - self.api_key = "normaluser" - self.retries = 3 - self.retry_policy = RetryPolicy.HUMAN - self.dir = "test_cache" - self.ttl = 60 * 60 * 24 * 7 - - self.cache = self._get_http_cache( - path=Path(self.dir), - ttl=self.ttl, - ) - self.http_client = self._get_http_client( - server=self.server, - base_url=self.base_url, - api_key=self.api_key, - retries=self.retries, - retry_policy=self.retry_policy, - cache=self.cache, - ) - self.minio_client = self._get_minio_client(path=Path(self.dir)) + config = openml._backend.get_config() - if self.cache.path.exists(): - shutil.rmtree(self.cache.path) - - def tearDown(self) -> None: - if self.cache.path.exists(): - shutil.rmtree(self.cache.path) + self.retries = config.connection.retries + self.retry_policy = config.connection.retry_policy + self.ttl = config.cache.ttl + self.current_api_version = None - def _get_http_cache( - self, - path: Path, - ttl: int, - ) -> HTTPCache: - return HTTPCache( - path=path, - ttl=ttl, - ) + abspath_this_file = Path(inspect.getfile(self.__class__)).absolute() + self.cache_dir = abspath_this_file.parent.parent / "files" + if not self.cache_dir.is_dir(): + raise ValueError( + f"Cannot find test cache dir, expected it to be {self.cache_dir}!", + ) - def _get_http_client( # noqa: PLR0913 - self, - server: str, - base_url: str, - api_key: str, - retries: int, - retry_policy: RetryPolicy, - cache: HTTPCache | None = None, - ) -> HTTPClient: - return HTTPClient( - server=server, - base_url=base_url, - api_key=api_key, - retries=retries, - retry_policy=retry_policy, - cache=cache, + self.cache = HTTPCache( + path=self.cache_dir, + ttl=self.ttl, ) - - def _get_minio_client( - self, - path: Path | None = None, - ) -> MinIOClient: - return MinIOClient(path=path) - - def _get_url( - self, - server: str | None = None, - base_url: str | None = None, - path: str | None = None, - ) -> str: - server = server if server else self.server - base_url = base_url if base_url else self.base_url - path = path if path else "" - return urljoin(self.server, urljoin(self.base_url, path)) + self.http_clients = { + APIVersion.V1: HTTPClient( + server="https://test.openml.org/", + base_url="api/v1/xml/", + api_key="normaluser", + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ), + APIVersion.V2: HTTPClient( + server="http://localhost:8002/", + base_url="", + api_key="", + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ), + } + self.minio_client = MinIOClient(path=self.cache_dir) def check_task_existence( diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index efaeaeeef..3c35ea5e1 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -4,11 +4,22 @@ import pytest from openml.testing import TestAPIBase import os +from urllib.parse import urljoin +from openml.enums import APIVersion class TestHTTPClient(TestAPIBase): + def setUp(self): + super().setUp() + self.http_client = self.http_clients[APIVersion.V1] + + def _prepare_url(self, path: str | None = None) -> str: + server = self.http_client.server + base_url = self.http_client.base_url + return urljoin(server, urljoin(base_url, path)) + def test_cache(self): - url = self._get_url(path="task/31") + url = self._prepare_url(path="task/31") params = {"param1": "value1", "param2": "value2"} key = self.cache.get_key(url, params) @@ -18,6 +29,7 @@ def test_cache(self): "test", "api", "v1", + "xml", "task", "31", "param1=value1¶m2=value2", @@ -68,7 +80,7 @@ def test_get_with_cache_creates_cache(self): # verify cache directory structure exists cache_key = self.cache.get_key( - self._get_url(path="task/1"), + self._prepare_url(path="task/1"), {}, ) cache_path = self.cache._key_to_path(cache_key) @@ -94,7 +106,7 @@ def test_get_cache_expires(self): self.cache.ttl = 1 path = "task/1" - url = self._get_url(path=path) + url = self._prepare_url(path=path) key = self.cache.get_key(url, {}) cache_path = self.cache._key_to_path(key) / "meta.json" @@ -115,7 +127,7 @@ def test_get_cache_expires(self): def test_get_reset_cache(self): path = "task/1" - url = self._get_url(path=path) + url = self._prepare_url(path=path) key = self.cache.get_key(url, {}) cache_path = self.cache._key_to_path(key) / "meta.json" diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 9f9e61ba6..5fa9d624d 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -2,18 +2,13 @@ import pytest from openml.testing import TestAPIBase from openml._api import ResourceV1API, ResourceV2API, FallbackProxy -from openml.enums import ResourceType +from openml.enums import ResourceType, APIVersion from openml.exceptions import OpenMLNotSupportedError -class TestResourceV1API(TestAPIBase): - def setUp(self): - super().setUp() - self.resource = ResourceV1API(self.http_client) - self.resource.resource_type = ResourceType.TASK - - @pytest.mark.uses_test_server() - def test_publish_and_delete(self): +@pytest.mark.uses_test_server() +class TestResourceAPIBase(TestAPIBase): + def _publish_and_delete(self): task_xml = """ 5 @@ -22,30 +17,19 @@ def test_publish_and_delete(self): """ - task_id = None - try: - # Publish the task - task_id = self.resource.publish( - "task", - files={"description": task_xml}, - ) - - # Get the task to verify it exists - get_response = self.http_client.get(f"task/{task_id}") - self.assertEqual(get_response.status_code, 200) - - finally: - # delete the task if it was created - if task_id is not None: - success = self.resource.delete(task_id) - self.assertTrue(success) + task_id = self.resource.publish( + "task", + files={"description": task_xml}, + ) + self.assertIsNotNone(task_id) + success = self.resource.delete(task_id) + self.assertTrue(success) - @pytest.mark.uses_test_server() - def test_tag_and_untag(self): + def _tag_and_untag(self): resource_id = 1 unique_indicator = str(time()).replace(".", "") - tag = f"TestResourceV1API_test_tag_and_untag_{unique_indicator}" + tag = f"{self.__class__.__name__}_test_tag_and_untag_{unique_indicator}" tags = self.resource.tag(resource_id, tag) self.assertIn(tag, tags) @@ -54,52 +38,51 @@ def test_tag_and_untag(self): self.assertNotIn(tag, tags) -class TestResourceV2API(TestResourceV1API): +class TestResourceV1API(TestResourceAPIBase): def setUp(self): super().setUp() - - self.server = "" - self.base_url = "" - self.api_key = "" - self.http_client = self._get_http_client( - server=self.server, - base_url=self.base_url, - api_key=self.api_key, - retries=self.retries, - retry_policy=self.retry_policy, - cache=self.cache, - ) - - self.resource = ResourceV2API(self.http_client) + http_client = self.http_clients[APIVersion.V1] + self.resource = ResourceV1API(http_client) self.resource.resource_type = ResourceType.TASK - @pytest.mark.xfail(raises=OpenMLNotSupportedError) def test_publish_and_delete(self): - super().test_tag_and_untag() - + self._publish_and_delete() - @pytest.mark.xfail(raises=OpenMLNotSupportedError) def test_tag_and_untag(self): - super().test_tag_and_untag() + self._tag_and_untag() -class TestResourceFallbackAPI(TestResourceV1API): +class TestResourceV2API(TestResourceAPIBase): def setUp(self): super().setUp() + http_client = self.http_clients[APIVersion.V2] + self.resource = ResourceV2API(http_client) + self.resource.resource_type = ResourceType.TASK + + def test_publish_and_delete(self): + with pytest.raises(OpenMLNotSupportedError): + self._tag_and_untag() + + def test_tag_and_untag(self): + with pytest.raises(OpenMLNotSupportedError): + self._tag_and_untag() - self.http_client_v2 = self._get_http_client( - server="", - base_url="", - api_key="", - retries=self.retries, - retry_policy=self.retry_policy, - cache=self.cache, - ) - resource_v1 = ResourceV1API(self.http_client) +class TestResourceFallbackAPI(TestResourceAPIBase): + def setUp(self): + super().setUp() + http_client_v1 = self.http_clients[APIVersion.V1] + resource_v1 = ResourceV1API(http_client_v1) resource_v1.resource_type = ResourceType.TASK - resource_v2 = ResourceV2API(self.http_client_v2) + http_client_v2 = self.http_clients[APIVersion.V2] + resource_v2 = ResourceV2API(http_client_v2) resource_v2.resource_type = ResourceType.TASK self.resource = FallbackProxy(resource_v2, resource_v1) + + def test_publish_and_delete(self): + self._publish_and_delete() + + def test_tag_and_untag(self): + self._tag_and_untag() From 9608c3652cfc74642c8bb71253af8dc31765d0a8 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 5 Feb 2026 15:27:51 +0500 Subject: [PATCH 080/156] remove unused current_api_version from TestAPIBase --- openml/testing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index d73e15a2d..63a93a0b8 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -289,7 +289,6 @@ class TestAPIBase(unittest.TestCase): cache: HTTPCache http_clients: dict[APIVersion, HTTPClient] minio_client: MinIOClient - current_api_version: APIVersion | None def setUp(self) -> None: config = openml._backend.get_config() @@ -297,7 +296,6 @@ def setUp(self) -> None: self.retries = config.connection.retries self.retry_policy = config.connection.retry_policy self.ttl = config.cache.ttl - self.current_api_version = None abspath_this_file = Path(inspect.getfile(self.__class__)).absolute() self.cache_dir = abspath_this_file.parent.parent / "files" From f6bc7f70707e422f727e38b9da7aaba4d4b6c322 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 5 Feb 2026 15:39:12 +0500 Subject: [PATCH 081/156] make TestAPIBase inherit TestBase --- openml/testing.py | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index 63a93a0b8..5a1a4d10f 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -281,52 +281,42 @@ def _check_fold_timing_evaluations( # noqa: PLR0913 assert evaluation <= max_val -class TestAPIBase(unittest.TestCase): - retries: int - retry_policy: RetryPolicy - ttl: int - cache_dir: Path +class TestAPIBase(TestBase): cache: HTTPCache http_clients: dict[APIVersion, HTTPClient] minio_client: MinIOClient - def setUp(self) -> None: - config = openml._backend.get_config() - - self.retries = config.connection.retries - self.retry_policy = config.connection.retry_policy - self.ttl = config.cache.ttl + def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: + super().setUp(n_levels=n_levels, tmpdir_suffix=tmpdir_suffix) - abspath_this_file = Path(inspect.getfile(self.__class__)).absolute() - self.cache_dir = abspath_this_file.parent.parent / "files" - if not self.cache_dir.is_dir(): - raise ValueError( - f"Cannot find test cache dir, expected it to be {self.cache_dir}!", - ) + retries = self.connection_n_retries + retry_policy = RetryPolicy.HUMAN if self.retry_policy == "human" else RetryPolicy.ROBOT + ttl = openml._backend.get_config_value("cache.ttl") + cache_dir = self.static_cache_dir self.cache = HTTPCache( - path=self.cache_dir, - ttl=self.ttl, + path=cache_dir, + ttl=ttl, ) self.http_clients = { APIVersion.V1: HTTPClient( server="https://test.openml.org/", base_url="api/v1/xml/", api_key="normaluser", - retries=self.retries, - retry_policy=self.retry_policy, + retries=retries, + retry_policy=retry_policy, cache=self.cache, ), APIVersion.V2: HTTPClient( server="http://localhost:8002/", base_url="", api_key="", - retries=self.retries, - retry_policy=self.retry_policy, + retries=retries, + retry_policy=retry_policy, cache=self.cache, ), } - self.minio_client = MinIOClient(path=self.cache_dir) + self.minio_client = MinIOClient(path=cache_dir) def check_task_existence( From baa3a38bedd4b888964a8e46d867ceb03e70942b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 5 Feb 2026 15:43:40 +0500 Subject: [PATCH 082/156] nits: test classes --- tests/test_api/test_http.py | 3 +++ tests/test_api/test_versions.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 3c35ea5e1..ab9bd7412 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -6,9 +6,12 @@ import os from urllib.parse import urljoin from openml.enums import APIVersion +from openml._api import HTTPClient class TestHTTPClient(TestAPIBase): + http_client: HTTPClient + def setUp(self): super().setUp() self.http_client = self.http_clients[APIVersion.V1] diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 5fa9d624d..1313889bc 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,13 +1,15 @@ from time import time import pytest from openml.testing import TestAPIBase -from openml._api import ResourceV1API, ResourceV2API, FallbackProxy +from openml._api import ResourceV1API, ResourceV2API, FallbackProxy, ResourceAPI from openml.enums import ResourceType, APIVersion from openml.exceptions import OpenMLNotSupportedError @pytest.mark.uses_test_server() class TestResourceAPIBase(TestAPIBase): + resource: ResourceAPI | FallbackProxy + def _publish_and_delete(self): task_xml = """ From 52b93feab0512c182299337292a79e00a1f6317e Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sat, 7 Feb 2026 00:03:53 +0500 Subject: [PATCH 083/156] minor fix in _sync_api_config identified while debugging https://github.com/openml/openml-python/pull/1616#issuecomment-3858997021 --- openml/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/config.py b/openml/config.py index c266ae9d9..692543a00 100644 --- a/openml/config.py +++ b/openml/config.py @@ -534,7 +534,7 @@ def _sync_api_config() -> None: p = urlparse(server) v1_server = f"{p.scheme}://{p.netloc}/" - v1_base_url = p.path.lstrip("/") + v1_base_url = p.path.rstrip("/") + "/" # requirement for urllib.parse.urljoin connection_retry_policy = RetryPolicy.HUMAN if retry_policy == "human" else RetryPolicy.ROBOT cache_dir = str(_root_cache_directory) From ec9477ffbe282c8177cb56e469fce71da7040126 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sat, 7 Feb 2026 00:14:14 +0500 Subject: [PATCH 084/156] chore: rerun CI From 10d134ab5915cc6b777857659e1647e26b22f2d3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 10 Feb 2026 22:02:52 +0500 Subject: [PATCH 085/156] remove duplicates in _api/resources/__init__.py --- openml/_api/resources/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index 1f0b2caa1..6d957966e 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -42,12 +42,10 @@ "EvaluationV1API", "EvaluationV2API", "FallbackProxy", - "FallbackProxy", "FlowAPI", "FlowV1API", "FlowV2API", "ResourceAPI", - "ResourceAPI", "ResourceV1API", "ResourceV2API", "RunAPI", From 935f0f431e8814a4b789d93ebdca04651dc030a3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 10 Feb 2026 22:21:11 +0500 Subject: [PATCH 086/156] implement HTTPClient.download and add tests --- openml/_api/clients/http.py | 56 +++++++++++++++++++++++++++++-- openml/exceptions.py | 4 +++ tests/test_api/test_http.py | 66 +++++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 3 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index db782cca7..2c15515f3 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -1,12 +1,13 @@ from __future__ import annotations +import hashlib import json import logging import math import random import time import xml -from collections.abc import Mapping +from collections.abc import Callable, Mapping from pathlib import Path from typing import Any from urllib.parse import urlencode, urljoin, urlparse @@ -18,6 +19,8 @@ from openml.__version__ import __version__ from openml.enums import RetryPolicy from openml.exceptions import ( + OpenMLCacheRequiredError, + OpenMLHashException, OpenMLNotAuthorizedError, OpenMLServerError, OpenMLServerException, @@ -315,7 +318,7 @@ def _request( # noqa: PLR0913 return response, retry_raise_e - def request( + def request( # noqa: PLR0913, C901 self, method: str, path: str, @@ -323,6 +326,7 @@ def request( use_cache: bool = False, reset_cache: bool = False, use_api_key: bool = False, + md5_checksum: str | None = None, **request_kwargs: Any, ) -> Response: url = urljoin(self.server, urljoin(self.base_url, path)) @@ -384,8 +388,20 @@ def request( cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) + if md5_checksum is not None: + self._verify_checksum(response, md5_checksum) + return response + def _verify_checksum(self, response: Response, md5_checksum: str) -> None: + # ruff sees hashlib.md5 as insecure + actual = hashlib.md5(response.content).hexdigest() # noqa: S324 + if actual != md5_checksum: + raise OpenMLHashException( + f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum} " + f"when downloading {response.url}.", + ) + def get( self, path: str, @@ -393,6 +409,7 @@ def get( use_cache: bool = False, reset_cache: bool = False, use_api_key: bool = False, + md5_checksum: str | None = None, **request_kwargs: Any, ) -> Response: return self.request( @@ -401,19 +418,22 @@ def get( use_cache=use_cache, reset_cache=reset_cache, use_api_key=use_api_key, + md5_checksum=md5_checksum, **request_kwargs, ) def post( self, path: str, + *, + use_api_key: bool = True, **request_kwargs: Any, ) -> Response: return self.request( method="POST", path=path, use_cache=False, - use_api_key=True, + use_api_key=use_api_key, **request_kwargs, ) @@ -429,3 +449,33 @@ def delete( use_api_key=True, **request_kwargs, ) + + def download( + self, + url: str, + handler: Callable[[Response, Path, str], Path] | None = None, + encoding: str = "utf-8", + file_name: str = "response.txt", + md5_checksum: str | None = None, + ) -> Path: + if self.cache is None: + raise OpenMLCacheRequiredError( + "A cache object is required for download, but none was provided in the HTTPClient." + ) + base = self.cache.path + file_path = base / "downloads" / urlparse(url).path.lstrip("/") / file_name + file_path = file_path.expanduser() + file_path.parent.mkdir(parents=True, exist_ok=True) + if file_path.exists(): + return file_path + + response = self.get(url, md5_checksum=md5_checksum) + if handler is not None: + return handler(response, file_path, encoding) + + return self._text_handler(response, file_path, encoding) + + def _text_handler(self, response: Response, path: Path, encoding: str) -> Path: + with path.open("w", encoding=encoding) as f: + f.write(response.text) + return path diff --git a/openml/exceptions.py b/openml/exceptions.py index 26c2d2591..10f693648 100644 --- a/openml/exceptions.py +++ b/openml/exceptions.py @@ -69,3 +69,7 @@ class ObjectNotPublishedError(PyOpenMLError): class OpenMLNotSupportedError(PyOpenMLError): """Raised when an API operation is not supported for a resource/version.""" + + +class OpenMLCacheRequiredError(PyOpenMLError): + """Raised when a cache object is required but not provided.""" diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index ab9bd7412..8dc6303d1 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -4,9 +4,11 @@ import pytest from openml.testing import TestAPIBase import os +from pathlib import Path from urllib.parse import urljoin from openml.enums import APIVersion from openml._api import HTTPClient +from openml.exceptions import OpenMLCacheRequiredError class TestHTTPClient(TestAPIBase): @@ -174,3 +176,67 @@ def test_post_and_delete(self): if task_id is not None: del_response = self.http_client.delete(f"task/{task_id}") self.assertEqual(del_response.status_code, 200) + + def test_download_requires_cache(self): + client = HTTPClient( + server=self.http_client.server, + base_url=self.http_client.base_url, + api_key=self.http_client.api_key, + retries=1, + retry_policy=self.http_client.retry_policy, + cache=None, + ) + + with pytest.raises(OpenMLCacheRequiredError): + client.download("https://www.openml.org") + + @pytest.mark.uses_test_server() + def test_download_creates_file(self): + # small stable resource + url = self.http_client.server + + path = self.http_client.download( + url, + file_name="index.html", + ) + + assert path.exists() + assert path.is_file() + assert path.read_text(encoding="utf-8") + + @pytest.mark.uses_test_server() + def test_download_is_cached_on_disk(self): + url = self.http_client.server + + path1 = self.http_client.download( + url, + file_name="cached.html", + ) + mtime1 = path1.stat().st_mtime + + # second call should NOT re-download + path2 = self.http_client.download( + url, + file_name="cached.html", + ) + mtime2 = path2.stat().st_mtime + + assert path1 == path2 + assert mtime1 == mtime2 + + @pytest.mark.uses_test_server() + def test_download_respects_custom_handler(self): + url = self.http_client.server + + def handler(response, path: Path, encoding: str): + path.write_text("HANDLED", encoding=encoding) + return path + + path = self.http_client.download( + url, + handler=handler, + file_name="handled.txt", + ) + + assert path.exists() + assert path.read_text() == "HANDLED" From 9514df8920119d6bfedda83cbd8f558ef1e10792 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 11 Feb 2026 11:54:29 +0500 Subject: [PATCH 087/156] add docstrings --- openml/_api/clients/http.py | 383 ++++++++++++++++++ openml/_api/clients/minio.py | 23 ++ openml/_api/resources/base/base.py | 124 +++++- openml/_api/resources/base/fallback.py | 108 +++++ openml/_api/resources/base/resources.py | 18 + openml/_api/resources/base/versions.py | 164 ++++++++ openml/_api/resources/dataset.py | 4 +- openml/_api/resources/estimation_procedure.py | 4 +- openml/_api/resources/evaluation.py | 4 +- openml/_api/resources/evaluation_measure.py | 4 +- openml/_api/resources/flow.py | 4 +- openml/_api/resources/run.py | 4 +- openml/_api/resources/setup.py | 4 +- openml/_api/resources/study.py | 4 +- openml/_api/resources/task.py | 4 +- openml/_api/setup/_utils.py | 24 ++ openml/_api/setup/backend.py | 107 +++++ openml/_api/setup/builder.py | 53 +++ openml/_api/setup/config.py | 54 +++ 19 files changed, 1072 insertions(+), 22 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 2c15515f3..a1ccc5122 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -29,11 +29,52 @@ class HTTPCache: + """ + Filesystem-based cache for HTTP responses. + + This class stores HTTP responses on disk using a structured directory layout + derived from the request URL and parameters. Each cached response consists of + three files: metadata (``meta.json``), headers (``headers.json``), and the raw + body (``body.bin``). Entries are considered valid until their time-to-live + (TTL) expires. + + Parameters + ---------- + path : pathlib.Path + Base directory where cache entries are stored. + ttl : int + Time-to-live in seconds. Cached entries older than this value are treated + as expired. + + Notes + ----- + The cache key is derived from the URL (domain and path components) and query + parameters, excluding the ``api_key`` parameter. + """ + def __init__(self, *, path: Path, ttl: int) -> None: self.path = path self.ttl = ttl def get_key(self, url: str, params: dict[str, Any]) -> str: + """ + Generate a filesystem-safe cache key for a request. + + The key is constructed from the reversed domain components, URL path + segments, and URL-encoded query parameters (excluding ``api_key``). + + Parameters + ---------- + url : str + The full request URL. + params : dict of str to Any + Query parameters associated with the request. + + Returns + ------- + str + A relative path string representing the cache key. + """ parsed_url = urlparse(url) netloc_parts = parsed_url.netloc.split(".")[::-1] path_parts = parsed_url.path.strip("/").split("/") @@ -44,9 +85,44 @@ def get_key(self, url: str, params: dict[str, Any]) -> str: return str(Path(*netloc_parts, *path_parts, *params_part)) def _key_to_path(self, key: str) -> Path: + """ + Convert a cache key into an absolute filesystem path. + + Parameters + ---------- + key : str + Cache key as returned by :meth:`get_key`. + + Returns + ------- + pathlib.Path + Absolute path corresponding to the cache entry. + """ return self.path.joinpath(key) def load(self, key: str) -> Response: + """ + Load a cached HTTP response from disk. + + Parameters + ---------- + key : str + Cache key identifying the stored response. + + Returns + ------- + requests.Response + Reconstructed response object with status code, headers, body, and metadata. + + Raises + ------ + FileNotFoundError + If the cache entry or required files are missing. + TimeoutError + If the cached entry has expired based on the configured TTL. + ValueError + If required metadata is missing or malformed. + """ path = self._key_to_path(key) if not path.exists(): @@ -85,6 +161,22 @@ def load(self, key: str) -> Response: return response def save(self, key: str, response: Response) -> None: + """ + Persist an HTTP response to disk. + + Parameters + ---------- + key : str + Cache key identifying where to store the response. + response : requests.Response + Response object to cache. + + Notes + ----- + The response body is stored as binary data. Headers and metadata + (status code, URL, reason, encoding, elapsed time, request info, and + creation timestamp) are stored as JSON. + """ path = self._key_to_path(key) path.mkdir(parents=True, exist_ok=True) @@ -113,6 +205,29 @@ def save(self, key: str, response: Response) -> None: class HTTPClient: + """ + HTTP client for interacting with the OpenML API. + + This client supports configurable retry policies, optional filesystem + caching, API key authentication, and response validation including + checksum verification. + + Parameters + ---------- + server : str + Base server URL (e.g., ``https://www.openml.org``). + base_url : str + Base API path appended to the server URL. + api_key : str + API key used for authenticated endpoints. + retries : int + Maximum number of retry attempts for failed requests. + retry_policy : RetryPolicy + Strategy controlling delay between retries. + cache : HTTPCache or None, optional + Cache instance for storing and retrieving responses. + """ + def __init__( # noqa: PLR0913 self, *, @@ -136,17 +251,62 @@ def __init__( # noqa: PLR0913 self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} def _robot_delay(self, n: int) -> float: + """ + Compute delay for automated retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + + Notes + ----- + Uses a sigmoid-based growth curve with Gaussian noise to gradually + increase waiting time. + """ wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 variation = random.gauss(0, wait / 10) return max(1.0, wait + variation) def _human_delay(self, n: int) -> float: + """ + Compute delay for human-like retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + """ return max(1.0, n) def _parse_exception_response( self, response: Response, ) -> tuple[int | None, str]: + """ + Parse an error response returned by the server. + + Parameters + ---------- + response : requests.Response + HTTP response containing error details in JSON or XML format. + + Returns + ------- + tuple of (int or None, str) + Parsed error code and combined error message. The code may be + ``None`` if unavailable. + """ content_type = response.headers.get("Content-Type", "").lower() if "json" in content_type: @@ -183,6 +343,29 @@ def _raise_code_specific_error( url: str, files: Mapping[str, Any] | None, ) -> None: + """ + Raise specialized exceptions based on OpenML error codes. + + Parameters + ---------- + code : int + Server-provided error code. + message : str + Parsed error message. + url : str + Request URL associated with the error. + files : Mapping of str to Any or None + Files sent with the request, if any. + + Raises + ------ + OpenMLServerNoResult + If the error indicates a missing resource. + OpenMLNotAuthorizedError + If authentication is required or invalid. + OpenMLServerException + For other server-side errors (except retryable database errors). + """ if code in [111, 372, 512, 500, 482, 542, 674]: # 512 for runs, 372 for datasets, 500 for flows # 482 for tasks, 542 for evaluations, 674 for setups @@ -226,6 +409,31 @@ def _validate_response( files: Mapping[str, Any] | None, response: Response, ) -> Exception | None: + """ + Validate an HTTP response and determine whether to retry. + + Parameters + ---------- + method : str + HTTP method used for the request. + url : str + Full request URL. + files : Mapping of str to Any or None + Files sent with the request, if any. + response : requests.Response + Received HTTP response. + + Returns + ------- + Exception or None + ``None`` if the response is valid. Otherwise, an exception + indicating the error to raise or retry. + + Raises + ------ + OpenMLServerError + For unexpected server errors or malformed responses. + """ if ( "Content-Encoding" not in response.headers or response.headers["Content-Encoding"] != "gzip" @@ -288,6 +496,33 @@ def _request( # noqa: PLR0913 files: Mapping[str, Any] | None, **request_kwargs: Any, ) -> tuple[Response | None, Exception | None]: + """ + Execute a single HTTP request attempt. + + Parameters + ---------- + session : requests.Session + Active session used to send the request. + method : str + HTTP method (e.g., ``GET``, ``POST``). + url : str + Full request URL. + params : Mapping of str to Any + Query parameters. + data : Mapping of str to Any + Request body data. + headers : Mapping of str to str + HTTP headers. + files : Mapping of str to Any or None + Files to upload. + **request_kwargs : Any + Additional arguments forwarded to ``requests.Session.request``. + + Returns + ------- + tuple of (requests.Response or None, Exception or None) + Response and potential retry exception. + """ retry_raise_e: Exception | None = None response: Response | None = None @@ -329,6 +564,38 @@ def request( # noqa: PLR0913, C901 md5_checksum: str | None = None, **request_kwargs: Any, ) -> Response: + """ + Send an HTTP request with retry, caching, and validation support. + + Parameters + ---------- + method : str + HTTP method to use. + path : str + API path relative to the base URL. + use_cache : bool, optional + Whether to load/store responses from cache. + reset_cache : bool, optional + If True, bypass existing cache entries. + use_api_key : bool, optional + Whether to include the API key in query parameters. + md5_checksum : str or None, optional + Expected MD5 checksum of the response body. + **request_kwargs : Any + Additional arguments passed to the underlying request. + + Returns + ------- + requests.Response + Final validated response. + + Raises + ------ + Exception + Propagates network, validation, or server exceptions after retries. + OpenMLHashException + If checksum verification fails. + """ url = urljoin(self.server, urljoin(self.base_url, path)) retries = max(1, self.retries) @@ -394,6 +661,21 @@ def request( # noqa: PLR0913, C901 return response def _verify_checksum(self, response: Response, md5_checksum: str) -> None: + """ + Verify MD5 checksum of a response body. + + Parameters + ---------- + response : requests.Response + HTTP response whose content should be verified. + md5_checksum : str + Expected hexadecimal MD5 checksum. + + Raises + ------ + OpenMLHashException + If the computed checksum does not match the expected value. + """ # ruff sees hashlib.md5 as insecure actual = hashlib.md5(response.content).hexdigest() # noqa: S324 if actual != md5_checksum: @@ -412,6 +694,29 @@ def get( md5_checksum: str | None = None, **request_kwargs: Any, ) -> Response: + """ + Send a GET request. + + Parameters + ---------- + path : str + API path relative to the base URL. + use_cache : bool, optional + Whether to use the response cache. + reset_cache : bool, optional + Whether to ignore existing cached entries. + use_api_key : bool, optional + Whether to include the API key. + md5_checksum : str or None, optional + Expected MD5 checksum for response validation. + **request_kwargs : Any + Additional request arguments. + + Returns + ------- + requests.Response + HTTP response. + """ return self.request( method="GET", path=path, @@ -429,6 +734,23 @@ def post( use_api_key: bool = True, **request_kwargs: Any, ) -> Response: + """ + Send a POST request. + + Parameters + ---------- + path : str + API path relative to the base URL. + use_api_key : bool, optional + Whether to include the API key. + **request_kwargs : Any + Additional request arguments. + + Returns + ------- + requests.Response + HTTP response. + """ return self.request( method="POST", path=path, @@ -442,6 +764,21 @@ def delete( path: str, **request_kwargs: Any, ) -> Response: + """ + Send a DELETE request. + + Parameters + ---------- + path : str + API path relative to the base URL. + **request_kwargs : Any + Additional request arguments. + + Returns + ------- + requests.Response + HTTP response. + """ return self.request( method="DELETE", path=path, @@ -458,6 +795,35 @@ def download( file_name: str = "response.txt", md5_checksum: str | None = None, ) -> Path: + """ + Download a resource and store it in the cache directory. + + Parameters + ---------- + url : str + Absolute URL of the resource to download. + handler : callable or None, optional + Custom handler function accepting ``(response, path, encoding)`` + and returning a ``pathlib.Path``. + encoding : str, optional + Text encoding used when writing the response body. + file_name : str, optional + Name of the saved file. + md5_checksum : str or None, optional + Expected MD5 checksum for integrity verification. + + Returns + ------- + pathlib.Path + Path to the downloaded file. + + Raises + ------ + OpenMLCacheRequiredError + If no cache instance is configured. + OpenMLHashException + If checksum verification fails. + """ if self.cache is None: raise OpenMLCacheRequiredError( "A cache object is required for download, but none was provided in the HTTPClient." @@ -476,6 +842,23 @@ def download( return self._text_handler(response, file_path, encoding) def _text_handler(self, response: Response, path: Path, encoding: str) -> Path: + """ + Write response text content to a file. + + Parameters + ---------- + response : requests.Response + HTTP response containing text data. + path : pathlib.Path + Destination file path. + encoding : str + Text encoding for writing the file. + + Returns + ------- + pathlib.Path + Path to the written file. + """ with path.open("w", encoding=encoding) as f: f.write(response.text) return path diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py index 2edc8269b..1e9b534fb 100644 --- a/openml/_api/clients/minio.py +++ b/openml/_api/clients/minio.py @@ -6,6 +6,29 @@ class MinIOClient: + """ + Lightweight client configuration for interacting with a MinIO-compatible + object storage service. + + This class stores basic configuration such as a base filesystem path and + default HTTP headers. It is intended to be extended with actual request + or storage logic elsewhere. + + Parameters + ---------- + path : pathlib.Path or None, optional + Base path used for local storage or downloads. If ``None``, no + default path is configured. + + Attributes + ---------- + path : pathlib.Path or None + Configured base path for storage operations. + headers : dict of str to str + Default HTTP headers, including a user-agent identifying the + OpenML Python client version. + """ + def __init__(self, path: Path | None = None) -> None: self.path = path self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 5eadc4932..5a2c1faa6 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -14,6 +14,33 @@ class ResourceAPI(ABC): + """ + Abstract base class for OpenML resource APIs. + + This class defines the common interface for interacting with OpenML + resources (e.g., datasets, flows, runs) across different API versions. + Concrete subclasses must implement the resource-specific operations + such as publishing, deleting, and tagging. + + Parameters + ---------- + http : HTTPClient + Configured HTTP client used for communication with the OpenML API. + minio : MinIOClient or None, optional + Optional MinIO client used for object storage operations. + + Attributes + ---------- + api_version : APIVersion + API version implemented by the resource. + resource_type : ResourceType + Type of OpenML resource handled by the implementation. + _http : HTTPClient + Internal HTTP client instance. + _minio : MinIOClient or None + Internal MinIO client instance, if provided. + """ + api_version: APIVersion resource_type: ResourceType @@ -22,18 +49,107 @@ def __init__(self, http: HTTPClient, minio: MinIOClient | None = None): self._minio = minio @abstractmethod - def delete(self, resource_id: int) -> bool: ... + def delete(self, resource_id: int) -> bool: + """ + Delete a resource by its identifier. + + Parameters + ---------- + resource_id : int + Unique identifier of the resource to delete. + + Returns + ------- + bool + ``True`` if the deletion was successful. + + Notes + ----- + Concrete subclasses must implement this method. + """ @abstractmethod - def publish(self, path: str, files: Mapping[str, Any] | None) -> int: ... + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: + """ + Publish a new resource to the OpenML server. + + Parameters + ---------- + path : str + API endpoint path used for publishing the resource. + files : Mapping of str to Any or None + Files or payload data required for publishing. The structure + depends on the resource type. + + Returns + ------- + int + Identifier of the newly created resource. + + Notes + ----- + Concrete subclasses must implement this method. + """ @abstractmethod - def tag(self, resource_id: int, tag: str) -> list[str]: ... + def tag(self, resource_id: int, tag: str) -> list[str]: + """ + Add a tag to a resource. + + Parameters + ---------- + resource_id : int + Identifier of the resource to tag. + tag : str + Tag to associate with the resource. + + Returns + ------- + list of str + Updated list of tags assigned to the resource. + + Notes + ----- + Concrete subclasses must implement this method. + """ @abstractmethod - def untag(self, resource_id: int, tag: str) -> list[str]: ... + def untag(self, resource_id: int, tag: str) -> list[str]: + """ + Remove a tag from a resource. + + Parameters + ---------- + resource_id : int + Identifier of the resource to untag. + tag : str + Tag to remove from the resource. + + Returns + ------- + list of str + Updated list of tags assigned to the resource. + + Notes + ----- + Concrete subclasses must implement this method. + """ def _not_supported(self, *, method: str) -> NoReturn: + """ + Raise an error indicating that a method is not supported. + + Parameters + ---------- + method : str + Name of the unsupported method. + + Raises + ------ + OpenMLNotSupportedError + If the current API version does not support the requested method + for the given resource type. + """ version = getattr(self.api_version, "value", "unknown") resource = getattr(self.resource_type, "value", "unknown") diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py index 3919c36a9..9b8f64a17 100644 --- a/openml/_api/resources/base/fallback.py +++ b/openml/_api/resources/base/fallback.py @@ -7,18 +7,82 @@ class FallbackProxy: + """ + Proxy object that provides transparent fallback across multiple API versions. + + This class delegates attribute access to a sequence of API implementations. + When a callable attribute is invoked and raises ``OpenMLNotSupportedError``, + the proxy automatically attempts the same method on subsequent API instances + until one succeeds. + + Parameters + ---------- + *api_versions : Any + One or more API implementation instances ordered by priority. + The first API is treated as the primary implementation, and + subsequent APIs are used as fallbacks. + + Raises + ------ + ValueError + If no API implementations are provided. + + Notes + ----- + Attribute lookup is performed dynamically via ``__getattr__``. + Only methods that raise ``OpenMLNotSupportedError`` trigger fallback + behavior. Other exceptions are propagated immediately. + """ + def __init__(self, *api_versions: Any): if not api_versions: raise ValueError("At least one API version must be provided") self._apis = api_versions def __getattr__(self, name: str) -> Any: + """ + Dynamically resolve attribute access across API implementations. + + Parameters + ---------- + name : str + Name of the attribute being accessed. + + Returns + ------- + Any + The resolved attribute. If it is callable, a wrapped function + providing fallback behavior is returned. + + Raises + ------ + AttributeError + If none of the API implementations define the attribute. + """ api, attr = self._find_attr(name) if callable(attr): return self._wrap_callable(name, api, attr) return attr def _find_attr(self, name: str) -> tuple[Any, Any]: + """ + Find the first API implementation that defines a given attribute. + + Parameters + ---------- + name : str + Name of the attribute to search for. + + Returns + ------- + tuple of (Any, Any) + The API instance and the corresponding attribute. + + Raises + ------ + AttributeError + If no API implementation defines the attribute. + """ for api in self._apis: attr = getattr(api, name, None) if attr is not None: @@ -31,6 +95,25 @@ def _wrap_callable( primary_api: Any, primary_attr: Callable[..., Any], ) -> Callable[..., Any]: + """ + Wrap a callable attribute to enable fallback behavior. + + Parameters + ---------- + name : str + Name of the method being wrapped. + primary_api : Any + Primary API instance providing the callable. + primary_attr : Callable[..., Any] + Callable attribute obtained from the primary API. + + Returns + ------- + Callable[..., Any] + Wrapped function that attempts the primary call first and + falls back to other APIs if ``OpenMLNotSupportedError`` is raised. + """ + def wrapper(*args: Any, **kwargs: Any) -> Any: try: return primary_attr(*args, **kwargs) @@ -46,6 +129,31 @@ def _call_fallbacks( *args: Any, **kwargs: Any, ) -> Any: + """ + Attempt to call a method on fallback API implementations. + + Parameters + ---------- + name : str + Name of the method to invoke. + skip_api : Any + API instance to skip (typically the primary API that already failed). + *args : Any + Positional arguments passed to the method. + **kwargs : Any + Keyword arguments passed to the method. + + Returns + ------- + Any + Result returned by the first successful fallback invocation. + + Raises + ------ + OpenMLNotSupportedError + If all API implementations either do not define the method + or raise ``OpenMLNotSupportedError``. + """ for api in self._apis: if api is skip_api: continue diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 8ccd5776e..ede0e1034 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -6,36 +6,54 @@ class DatasetAPI(ResourceAPI): + """Abstract API interface for dataset resources.""" + resource_type: ResourceType = ResourceType.DATASET class TaskAPI(ResourceAPI): + """Abstract API interface for task resources.""" + resource_type: ResourceType = ResourceType.TASK class EvaluationMeasureAPI(ResourceAPI): + """Abstract API interface for evaluation measure resources.""" + resource_type: ResourceType = ResourceType.EVALUATION_MEASURE class EstimationProcedureAPI(ResourceAPI): + """Abstract API interface for estimation procedure resources.""" + resource_type: ResourceType = ResourceType.ESTIMATION_PROCEDURE class EvaluationAPI(ResourceAPI): + """Abstract API interface for evaluation resources.""" + resource_type: ResourceType = ResourceType.EVALUATION class FlowAPI(ResourceAPI): + """Abstract API interface for flow resources.""" + resource_type: ResourceType = ResourceType.FLOW class StudyAPI(ResourceAPI): + """Abstract API interface for study resources.""" + resource_type: ResourceType = ResourceType.STUDY class RunAPI(ResourceAPI): + """Abstract API interface for run resources.""" + resource_type: ResourceType = ResourceType.RUN class SetupAPI(ResourceAPI): + """Abstract API interface for setup resources.""" + resource_type: ResourceType = ResourceType.SETUP diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index b86272377..51a958b90 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -16,14 +16,74 @@ class ResourceV1API(ResourceAPI): + """ + Version 1 implementation of the OpenML resource API. + + This class provides XML-based implementations for publishing, + deleting, tagging, and untagging resources using the V1 API + endpoints. Responses are parsed using ``xmltodict``. + + Notes + ----- + V1 endpoints expect and return XML. Error handling follows the + legacy OpenML server behavior and maps specific error codes to + more descriptive exceptions where appropriate. + """ + api_version: APIVersion = APIVersion.V1 def publish(self, path: str, files: Mapping[str, Any] | None) -> int: + """ + Publish a new resource using the V1 API. + + Parameters + ---------- + path : str + API endpoint path for the upload. + files : Mapping of str to Any or None + Files to upload as part of the request payload. + + Returns + ------- + int + Identifier of the newly created resource. + + Raises + ------ + ValueError + If the server response does not contain a valid resource ID. + OpenMLServerException + If the server returns an error during upload. + """ response = self._http.post(path, files=files) parsed_response = xmltodict.parse(response.content) return self._extract_id_from_upload(parsed_response) def delete(self, resource_id: int) -> bool: + """ + Delete a resource using the V1 API. + + Parameters + ---------- + resource_id : int + Identifier of the resource to delete. + + Returns + ------- + bool + ``True`` if the server confirms successful deletion. + + Raises + ------ + ValueError + If the resource type is not supported for deletion. + OpenMLNotAuthorizedError + If the user is not permitted to delete the resource. + OpenMLServerError + If deletion fails for an unknown reason. + OpenMLServerException + For other server-side errors. + """ resource_type = self._get_endpoint_name() legal_resources = {"data", "flow", "task", "run", "study", "user"} @@ -40,6 +100,28 @@ def delete(self, resource_id: int) -> bool: raise def tag(self, resource_id: int, tag: str) -> list[str]: + """ + Add a tag to a resource using the V1 API. + + Parameters + ---------- + resource_id : int + Identifier of the resource to tag. + tag : str + Tag to associate with the resource. + + Returns + ------- + list of str + Updated list of tags assigned to the resource. + + Raises + ------ + ValueError + If the resource type does not support tagging. + OpenMLServerException + If the server returns an error. + """ resource_type = self._get_endpoint_name() legal_resources = {"data", "task", "flow", "setup", "run"} @@ -58,6 +140,28 @@ def tag(self, resource_id: int, tag: str) -> list[str]: return tags def untag(self, resource_id: int, tag: str) -> list[str]: + """ + Remove a tag from a resource using the V1 API. + + Parameters + ---------- + resource_id : int + Identifier of the resource to untag. + tag : str + Tag to remove from the resource. + + Returns + ------- + list of str + Updated list of tags assigned to the resource. + + Raises + ------ + ValueError + If the resource type does not support tagging. + OpenMLServerException + If the server returns an error. + """ resource_type = self._get_endpoint_name() legal_resources = {"data", "task", "flow", "setup", "run"} @@ -76,6 +180,19 @@ def untag(self, resource_id: int, tag: str) -> list[str]: return tags def _get_endpoint_name(self) -> str: + """ + Return the V1 endpoint name for the current resource type. + + Returns + ------- + str + Endpoint segment used in V1 API paths. + + Notes + ----- + Datasets use the special endpoint name ``"data"`` instead of + their enum value. + """ if self.resource_type == ResourceType.DATASET: return "data" return cast("str", self.resource_type.value) @@ -83,6 +200,26 @@ def _get_endpoint_name(self) -> str: def _handle_delete_exception( self, resource_type: str, exception: OpenMLServerException ) -> None: + """ + Map V1 deletion error codes to more specific exceptions. + + Parameters + ---------- + resource_type : str + Endpoint name of the resource type. + exception : OpenMLServerException + Original exception raised during deletion. + + Raises + ------ + OpenMLNotAuthorizedError + If the resource cannot be deleted due to ownership or + dependent entities. + OpenMLServerError + If deletion fails for an unknown reason. + OpenMLServerException + If the error code is not specially handled. + """ # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php # Most exceptions are descriptive enough to be raised as their standard # OpenMLServerException, however there are two cases where we add information: @@ -116,6 +253,25 @@ def _handle_delete_exception( raise exception def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: + """ + Extract the resource identifier from an XML upload response. + + Parameters + ---------- + parsed : Mapping of str to Any + Parsed XML response as returned by ``xmltodict.parse``. + + Returns + ------- + int + Extracted resource identifier. + + Raises + ------ + ValueError + If the response structure is unexpected or no identifier + can be found. + """ # reads id from upload response # actual parsed dict: {"oml:upload_flow": {"@xmlns:oml": "...", "oml:id": "42"}} @@ -140,6 +296,14 @@ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: class ResourceV2API(ResourceAPI): + """ + Version 2 implementation of the OpenML resource API. + + This class represents the V2 API for resources. Operations such as + publishing, deleting, tagging, and untagging are currently not + supported and will raise ``OpenMLNotSupportedError``. + """ + api_version: APIVersion = APIVersion.V2 def publish(self, path: str, files: Mapping[str, Any] | None) -> int: # noqa: ARG002 diff --git a/openml/_api/resources/dataset.py b/openml/_api/resources/dataset.py index 51688a2fd..520594df9 100644 --- a/openml/_api/resources/dataset.py +++ b/openml/_api/resources/dataset.py @@ -4,8 +4,8 @@ class DatasetV1API(ResourceV1API, DatasetAPI): - pass + """Version 1 API implementation for dataset resources.""" class DatasetV2API(ResourceV2API, DatasetAPI): - pass + """Version 2 API implementation for dataset resources.""" diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py index b8ea7d2c3..a45f7af66 100644 --- a/openml/_api/resources/estimation_procedure.py +++ b/openml/_api/resources/estimation_procedure.py @@ -4,8 +4,8 @@ class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI): - pass + """Version 1 API implementation for estimation procedure resources.""" class EstimationProcedureV2API(ResourceV2API, EstimationProcedureAPI): - pass + """Version 2 API implementation for estimation procedure resources.""" diff --git a/openml/_api/resources/evaluation.py b/openml/_api/resources/evaluation.py index 07877e14e..fe7e360a6 100644 --- a/openml/_api/resources/evaluation.py +++ b/openml/_api/resources/evaluation.py @@ -4,8 +4,8 @@ class EvaluationV1API(ResourceV1API, EvaluationAPI): - pass + """Version 1 API implementation for evaluation resources.""" class EvaluationV2API(ResourceV2API, EvaluationAPI): - pass + """Version 2 API implementation for evaluation resources.""" diff --git a/openml/_api/resources/evaluation_measure.py b/openml/_api/resources/evaluation_measure.py index 63cf16c77..4ed5097f7 100644 --- a/openml/_api/resources/evaluation_measure.py +++ b/openml/_api/resources/evaluation_measure.py @@ -4,8 +4,8 @@ class EvaluationMeasureV1API(ResourceV1API, EvaluationMeasureAPI): - pass + """Version 1 API implementation for evaluation measure resources.""" class EvaluationMeasureV2API(ResourceV2API, EvaluationMeasureAPI): - pass + """Version 2 API implementation for evaluation measure resources.""" diff --git a/openml/_api/resources/flow.py b/openml/_api/resources/flow.py index ad2e05bd9..1716d89d3 100644 --- a/openml/_api/resources/flow.py +++ b/openml/_api/resources/flow.py @@ -4,8 +4,8 @@ class FlowV1API(ResourceV1API, FlowAPI): - pass + """Version 1 API implementation for flow resources.""" class FlowV2API(ResourceV2API, FlowAPI): - pass + """Version 2 API implementation for flow resources.""" diff --git a/openml/_api/resources/run.py b/openml/_api/resources/run.py index 151c69e35..4caccb0b6 100644 --- a/openml/_api/resources/run.py +++ b/openml/_api/resources/run.py @@ -4,8 +4,8 @@ class RunV1API(ResourceV1API, RunAPI): - pass + """Version 1 API implementation for run resources.""" class RunV2API(ResourceV2API, RunAPI): - pass + """Version 2 API implementation for run resources.""" diff --git a/openml/_api/resources/setup.py b/openml/_api/resources/setup.py index 78a36cecc..2896d3d9f 100644 --- a/openml/_api/resources/setup.py +++ b/openml/_api/resources/setup.py @@ -4,8 +4,8 @@ class SetupV1API(ResourceV1API, SetupAPI): - pass + """Version 1 API implementation for setup resources.""" class SetupV2API(ResourceV2API, SetupAPI): - pass + """Version 2 API implementation for setup resources.""" diff --git a/openml/_api/resources/study.py b/openml/_api/resources/study.py index cefd55004..fb073555c 100644 --- a/openml/_api/resources/study.py +++ b/openml/_api/resources/study.py @@ -4,8 +4,8 @@ class StudyV1API(ResourceV1API, StudyAPI): - pass + """Version 1 API implementation for study resources.""" class StudyV2API(ResourceV2API, StudyAPI): - pass + """Version 2 API implementation for study resources.""" diff --git a/openml/_api/resources/task.py b/openml/_api/resources/task.py index a367c9aa1..1f62aa3f3 100644 --- a/openml/_api/resources/task.py +++ b/openml/_api/resources/task.py @@ -4,8 +4,8 @@ class TaskV1API(ResourceV1API, TaskAPI): - pass + """Version 1 API implementation for task resources.""" class TaskV2API(ResourceV2API, TaskAPI): - pass + """Version 2 API implementation for task resources.""" diff --git a/openml/_api/setup/_utils.py b/openml/_api/setup/_utils.py index ddcf5b41c..6606140f9 100644 --- a/openml/_api/setup/_utils.py +++ b/openml/_api/setup/_utils.py @@ -12,6 +12,30 @@ def _resolve_default_cache_dir() -> Path: + """ + Determine the default cache directory for OpenML data. + + This function checks for user-defined environment variables and + platform-specific defaults to resolve where cached files should + be stored. It also provides backward-compatibility warnings if + legacy directories are detected. + + Returns + ------- + Path + Path to the cache directory that should be used. + + Notes + ----- + - If the environment variable ``OPENML_CACHE_DIR`` is set, its value + is used as the cache directory. + - On non-Linux systems, the default is ``~/.openml``. + - On Linux, the function follows the XDG Base Directory Specification: + - Uses ``$XDG_CACHE_HOME/openml`` if ``XDG_CACHE_HOME`` is set. + - Falls back to ``~/.cache/openml`` if ``XDG_CACHE_HOME`` is not set. + - If an old cache directory exists at ``$XDG_CACHE_HOME/org/openml``, + a warning is logged for backward compatibility. + """ user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") if user_defined_cache_dir is not None: return Path(user_defined_cache_dir) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index c29d1dbad..56f689c03 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -21,6 +21,42 @@ class APIBackend: + """ + Central backend for accessing all OpenML API resource interfaces. + + This class provides a singleton interface to dataset, task, flow, + evaluation, run, setup, study, and other resource APIs. It also + manages configuration through a nested ``Config`` object and + allows dynamic retrieval and updating of configuration values. + + Parameters + ---------- + config : Config, optional + Optional configuration object. If not provided, a default + ``Config`` instance is created. + + Attributes + ---------- + dataset : DatasetAPI + Interface for dataset-related API operations. + task : TaskAPI + Interface for task-related API operations. + evaluation_measure : EvaluationMeasureAPI + Interface for evaluation measure-related API operations. + estimation_procedure : EstimationProcedureAPI + Interface for estimation procedure-related API operations. + evaluation : EvaluationAPI + Interface for evaluation-related API operations. + flow : FlowAPI + Interface for flow-related API operations. + study : StudyAPI + Interface for study-related API operations. + run : RunAPI + Interface for run-related API operations. + setup : SetupAPI + Interface for setup-related API operations. + """ + _instance: APIBackend | None = None def __init__(self, config: Config | None = None): @@ -65,22 +101,62 @@ def setup(self) -> SetupAPI: @classmethod def get_instance(cls) -> APIBackend: + """ + Get the singleton instance of the APIBackend. + + Returns + ------- + APIBackend + Singleton instance of the backend. + """ if cls._instance is None: cls._instance = cls() return cls._instance @classmethod def get_config(cls) -> Config: + """ + Get a deep copy of the current configuration. + + Returns + ------- + Config + Current configuration object. + """ return deepcopy(cls.get_instance()._config) @classmethod def set_config(cls, config: Config) -> None: + """ + Set a new configuration for the backend. + + This updates both the internal ``_config`` object and rebuilds + the internal API backend using ``APIBackendBuilder``. + + Parameters + ---------- + config : Config + Configuration object to set. + """ instance = cls.get_instance() instance._config = config instance._backend = APIBackendBuilder.build(config) @classmethod def get_config_value(cls, key: str) -> Any: + """ + Retrieve a specific configuration value by key. + + Parameters + ---------- + key : str + Dot-separated key specifying the configuration field. + + Returns + ------- + Any + Deep copy of the requested configuration value. + """ keys = key.split(".") config_value = cls.get_instance()._config for k in keys: @@ -92,6 +168,16 @@ def get_config_value(cls, key: str) -> Any: @classmethod def set_config_value(cls, key: str, value: Any) -> None: + """ + Set a specific configuration value by key. + + Parameters + ---------- + key : str + Dot-separated key specifying the configuration field. + value : Any + Value to assign to the configuration field. + """ keys = key.split(".") config = cls.get_instance()._config parent = config @@ -105,6 +191,19 @@ def set_config_value(cls, key: str, value: Any) -> None: @classmethod def get_config_values(cls, keys: list[str]) -> list[Any]: + """ + Retrieve multiple configuration values by a list of keys. + + Parameters + ---------- + keys : list of str + List of dot-separated keys specifying configuration fields. + + Returns + ------- + list of Any + List of deep copies of the requested configuration values. + """ values = [] for key in keys: value = cls.get_config_value(key) @@ -113,6 +212,14 @@ def get_config_values(cls, keys: list[str]) -> list[Any]: @classmethod def set_config_values(cls, config_dict: dict[str, Any]) -> None: + """ + Set multiple configuration values using a dictionary. + + Parameters + ---------- + config_dict : dict of str to Any + Mapping of dot-separated configuration keys to their values. + """ config = cls.get_instance()._config for key, value in config_dict.items(): diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index f801fe525..6263066b2 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -13,6 +13,41 @@ class APIBackendBuilder: + """ + Builder class for constructing API backend instances. + + This class organizes resource-specific API objects (datasets, tasks, + flows, evaluations, runs, setups, studies, etc.) and provides a + centralized access point for both primary and optional fallback APIs. + + Parameters + ---------- + resource_apis : Mapping[ResourceType, ResourceAPI | FallbackProxy] + Mapping of resource types to their corresponding API instances + or fallback proxies. + + Attributes + ---------- + dataset : ResourceAPI | FallbackProxy + API interface for dataset resources. + task : ResourceAPI | FallbackProxy + API interface for task resources. + evaluation_measure : ResourceAPI | FallbackProxy + API interface for evaluation measure resources. + estimation_procedure : ResourceAPI | FallbackProxy + API interface for estimation procedure resources. + evaluation : ResourceAPI | FallbackProxy + API interface for evaluation resources. + flow : ResourceAPI | FallbackProxy + API interface for flow resources. + study : ResourceAPI | FallbackProxy + API interface for study resources. + run : ResourceAPI | FallbackProxy + API interface for run resources. + setup : ResourceAPI | FallbackProxy + API interface for setup resources. + """ + def __init__( self, resource_apis: Mapping[ResourceType, ResourceAPI | FallbackProxy], @@ -29,6 +64,24 @@ def __init__( @classmethod def build(cls, config: Config) -> APIBackendBuilder: + """ + Construct an APIBackendBuilder instance from a configuration. + + This method initializes HTTP and MinIO clients, creates resource-specific + API instances for the primary API version, and optionally wraps them + with fallback proxies if a fallback API version is configured. + + Parameters + ---------- + config : Config + Configuration object containing API versions, endpoints, cache + settings, and connection parameters. + + Returns + ------- + APIBackendBuilder + Builder instance with all resource API interfaces initialized. + """ cache_dir = Path(config.cache.dir).expanduser() http_cache = HTTPCache(path=cache_dir, ttl=config.cache.ttl) diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 4108227aa..002beabe0 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -10,6 +10,19 @@ @dataclass class APIConfig: + """ + Configuration for a specific OpenML API version. + + Parameters + ---------- + server : str + Base server URL for the API. + base_url : str + API-specific base path appended to the server URL. + api_key : str + API key used for authentication. + """ + server: str base_url: str api_key: str @@ -17,18 +30,59 @@ class APIConfig: @dataclass class ConnectionConfig: + """ + Configuration for HTTP connection behavior. + + Parameters + ---------- + retries : int + Number of retry attempts for failed requests. + retry_policy : RetryPolicy + Policy for determining delays between retries (human-like or robot-like). + """ + retries: int retry_policy: RetryPolicy @dataclass class CacheConfig: + """ + Configuration for caching API responses locally. + + Parameters + ---------- + dir : str + Path to the directory where cached files will be stored. + ttl : int + Time-to-live for cached entries, in seconds. + """ + dir: str ttl: int @dataclass class Config: + """ + Global configuration for the OpenML Python client. + + Includes API versions, connection settings, and caching options. + + Attributes + ---------- + api_version : APIVersion + Primary API version to use (default is V1). + fallback_api_version : APIVersion or None + Optional fallback API version if the primary API does not support certain operations. + api_configs : dict of APIVersion to APIConfig + Mapping from API version to its server/base URL and API key configuration. + connection : ConnectionConfig + Settings for request retries and retry policy. + cache : CacheConfig + Settings for local caching of API responses. + """ + api_version: APIVersion = APIVersion.V1 fallback_api_version: APIVersion | None = None From 53bee943aba0d564170f824de5108e569e937cc7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 12 Feb 2026 17:39:37 +0500 Subject: [PATCH 088/156] update minio --- openml/_api/clients/minio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py index 1e9b534fb..e6a94a6e4 100644 --- a/openml/_api/clients/minio.py +++ b/openml/_api/clients/minio.py @@ -29,6 +29,6 @@ class MinIOClient: OpenML Python client version. """ - def __init__(self, path: Path | None = None) -> None: + def __init__(self, path: Path) -> None: self.path = path self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} From 33b4ca0f103e0fa9d37368f6ee632d7e1f3217b9 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 21:37:36 +0500 Subject: [PATCH 089/156] make delay functions static --- openml/_api/clients/http.py | 6 +++--- openml/_api/clients/utils.py | 40 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 openml/_api/clients/utils.py diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index a1ccc5122..b90818921 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -27,6 +27,8 @@ OpenMLServerNoResult, ) +from .utils import human_delay, robot_delay + class HTTPCache: """ @@ -245,9 +247,7 @@ def __init__( # noqa: PLR0913 self.retry_policy = retry_policy self.cache = cache - self.retry_func = ( - self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay - ) + self.retry_func = human_delay if retry_policy == RetryPolicy.HUMAN else robot_delay self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} def _robot_delay(self, n: int) -> float: diff --git a/openml/_api/clients/utils.py b/openml/_api/clients/utils.py new file mode 100644 index 000000000..c21732504 --- /dev/null +++ b/openml/_api/clients/utils.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import math +import random + + +def robot_delay(n: int) -> float: + """ + Compute delay for automated retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + """ + wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 + variation = random.gauss(0, wait / 10) + return max(1.0, wait + variation) + + +def human_delay(n: int) -> float: + """ + Compute delay for human-like retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + """ + return max(1.0, n) From a6b9a45d6248dd9e24380d918b06d2b97edf0bbb Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 21:41:19 +0500 Subject: [PATCH 090/156] rename: retry_raise_e -> exception --- openml/_api/clients/http.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index b90818921..e344bcecb 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -446,7 +446,7 @@ def _validate_response( if response.status_code == requests.codes.URI_TOO_LONG: raise OpenMLServerError(f"URI too long! ({url})") - retry_raise_e: Exception | None = None + exception: Exception | None = None code: int | None = None message: str = "" @@ -461,7 +461,7 @@ def _validate_response( f"developers!\n{extra}" ) from e - retry_raise_e = e + exception = e except Exception as e: # If we failed to parse it out, @@ -480,10 +480,10 @@ def _validate_response( files=files, ) - if retry_raise_e is None: - retry_raise_e = OpenMLServerException(code=code, message=message, url=url) + if exception is None: + exception = OpenMLServerException(code=code, message=message, url=url) - return retry_raise_e + return exception def _request( # noqa: PLR0913 self, @@ -523,7 +523,7 @@ def _request( # noqa: PLR0913 tuple of (requests.Response or None, Exception or None) Response and potential retry exception. """ - retry_raise_e: Exception | None = None + exception: Exception | None = None response: Response | None = None try: @@ -541,17 +541,17 @@ def _request( # noqa: PLR0913 requests.exceptions.ConnectionError, requests.exceptions.SSLError, ) as e: - retry_raise_e = e + exception = e if response is not None: - retry_raise_e = self._validate_response( + exception = self._validate_response( method=method, url=url, files=files, response=response, ) - return response, retry_raise_e + return response, exception def request( # noqa: PLR0913, C901 self, @@ -626,7 +626,7 @@ def request( # noqa: PLR0913, C901 session = requests.Session() for retry_counter in range(1, retries + 1): - response, retry_raise_e = self._request( + response, exception = self._request( session=session, method=method, url=url, @@ -638,11 +638,11 @@ def request( # noqa: PLR0913, C901 ) # executed successfully - if retry_raise_e is None: + if exception is None: break # tries completed if retry_counter >= retries: - raise retry_raise_e + raise exception delay = self.retry_func(retry_counter) time.sleep(delay) From f924b3207037b47622415bc3b8ae6a8096683232 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 21:42:57 +0500 Subject: [PATCH 091/156] use context-manager for requests.Session --- openml/_api/clients/http.py | 42 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index e344bcecb..e624b2f54 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -624,30 +624,28 @@ def request( # noqa: PLR0913, C901 except Exception: raise # propagate unexpected cache errors - session = requests.Session() - for retry_counter in range(1, retries + 1): - response, exception = self._request( - session=session, - method=method, - url=url, - params=params, - data=data, - headers=headers, - files=files, - **request_kwargs, - ) - - # executed successfully - if exception is None: - break - # tries completed - if retry_counter >= retries: - raise exception + with requests.Session() as session: + for retry_counter in range(1, retries + 1): + response, exception = self._request( + session=session, + method=method, + url=url, + params=params, + data=data, + headers=headers, + files=files, + **request_kwargs, + ) - delay = self.retry_func(retry_counter) - time.sleep(delay) + # executed successfully + if exception is None: + break + # tries completed + if retry_counter >= retries: + raise exception - session.close() + delay = self.retry_func(retry_counter) + time.sleep(delay) assert response is not None From 541b0f26ff4a9fc565ad529712f2b38d700a1252 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 22:01:52 +0500 Subject: [PATCH 092/156] remove "assert response is not None" --- openml/_api/clients/http.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index e624b2f54..926829c71 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -9,7 +9,7 @@ import xml from collections.abc import Callable, Mapping from pathlib import Path -from typing import Any +from typing import Any, cast from urllib.parse import urlencode, urljoin, urlparse import requests @@ -647,7 +647,9 @@ def request( # noqa: PLR0913, C901 delay = self.retry_func(retry_counter) time.sleep(delay) - assert response is not None + # response is guaranteed to be not `None` + # otherwise an exception would have been raised before + response = cast("Response", response) if use_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) From acb173fa0e5e36464769eb069004a6cd02782811 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 22:07:04 +0500 Subject: [PATCH 093/156] verify checksum before caching --- openml/_api/clients/http.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 926829c71..d2c5b124f 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -651,13 +651,13 @@ def request( # noqa: PLR0913, C901 # otherwise an exception would have been raised before response = cast("Response", response) + if md5_checksum is not None: + self._verify_checksum(response, md5_checksum) + if use_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) - if md5_checksum is not None: - self._verify_checksum(response, md5_checksum) - return response def _verify_checksum(self, response: Response, md5_checksum: str) -> None: From 3e8d1f0dc158d281a181000e5f35efe26b69d571 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 22:42:03 +0500 Subject: [PATCH 094/156] update tests --- tests/test_api/test_http.py | 37 ++++++++++++++------------------- tests/test_api/test_versions.py | 9 ++++++-- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 8dc6303d1..2a1f2dcd5 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -2,7 +2,7 @@ import time import xmltodict import pytest -from openml.testing import TestAPIBase +from openml.testing import TestBase, TestAPIBase import os from pathlib import Path from urllib.parse import urljoin @@ -155,27 +155,22 @@ def test_post_and_delete(self): 17 """ + # post + response = self.http_client.post( + "task", + files={"description": task_xml}, + ) + self.assertEqual(response.status_code, 200) + xml_resp = xmltodict.parse(response.content) + task_id = int(xml_resp["oml:upload_task"]["oml:id"]) - task_id = None - try: - # POST the task - post_response = self.http_client.post( - "task", - files={"description": task_xml}, - ) - self.assertEqual(post_response.status_code, 200) - xml_resp = xmltodict.parse(post_response.content) - task_id = int(xml_resp["oml:upload_task"]["oml:id"]) - - # GET the task to verify it exists - get_response = self.http_client.get(f"task/{task_id}") - self.assertEqual(get_response.status_code, 200) - - finally: - # DELETE the task if it was created - if task_id is not None: - del_response = self.http_client.delete(f"task/{task_id}") - self.assertEqual(del_response.status_code, 200) + # cleanup incase of failure + TestBase._mark_entity_for_removal("task", task_id) + TestBase.logger.info(f"collected from {__file__}: {task_id}") + + # delete + response = self.http_client.delete(f"task/{task_id}") + self.assertEqual(response.status_code, 200) def test_download_requires_cache(self): client = HTTPClient( diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 1313889bc..cdb37a0d3 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,6 +1,6 @@ from time import time import pytest -from openml.testing import TestAPIBase +from openml.testing import TestBase, TestAPIBase from openml._api import ResourceV1API, ResourceV2API, FallbackProxy, ResourceAPI from openml.enums import ResourceType, APIVersion from openml.exceptions import OpenMLNotSupportedError @@ -18,13 +18,18 @@ def _publish_and_delete(self): 17 """ - + # publish task_id = self.resource.publish( "task", files={"description": task_xml}, ) self.assertIsNotNone(task_id) + # cleanup incase of failure + TestBase._mark_entity_for_removal("task", task_id) + TestBase.logger.info(f"collected from {__file__}: {task_id}") + + # delete success = self.resource.delete(task_id) self.assertTrue(success) From f83bdb5c0d2fc09c38ce948ba2b49ed23207e547 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 22:46:57 +0500 Subject: [PATCH 095/156] minor fix in ResourceV1API.untag --- openml/_api/resources/base/versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 51a958b90..dc41ba971 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -166,7 +166,7 @@ def untag(self, resource_id: int, tag: str) -> list[str]: legal_resources = {"data", "task", "flow", "setup", "run"} if resource_type not in legal_resources: - raise ValueError(f"Can't tag a {resource_type}") + raise ValueError(f"Can't untag a {resource_type}") path = f"{resource_type}/untag" data = {f"{resource_type}_id": resource_id, "tag": tag} From 2a42712d465c404a437b8f52ed49aa86a08f55e3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 16 Feb 2026 18:54:25 +0500 Subject: [PATCH 096/156] remove cache.ttl --- openml/_api/clients/http.py | 9 +-------- openml/_api/setup/builder.py | 2 +- openml/_api/setup/config.py | 5 ----- openml/testing.py | 2 -- tests/test_api/test_http.py | 23 ----------------------- 5 files changed, 2 insertions(+), 39 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index d2c5b124f..dba9cac6b 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -44,9 +44,6 @@ class HTTPCache: ---------- path : pathlib.Path Base directory where cache entries are stored. - ttl : int - Time-to-live in seconds. Cached entries older than this value are treated - as expired. Notes ----- @@ -54,9 +51,8 @@ class HTTPCache: parameters, excluding the ``api_key`` parameter. """ - def __init__(self, *, path: Path, ttl: int) -> None: + def __init__(self, *, path: Path) -> None: self.path = path - self.ttl = ttl def get_key(self, url: str, params: dict[str, Any]) -> str: """ @@ -144,9 +140,6 @@ def load(self, key: str) -> Response: if created_at is None: raise ValueError("Cache metadata missing 'created_at'") - if time.time() - created_at > self.ttl: - raise TimeoutError(f"Cache expired for {path}") - with headers_path.open("r", encoding="utf-8") as f: headers = json.load(f) diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 6263066b2..05c37807d 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -84,7 +84,7 @@ def build(cls, config: Config) -> APIBackendBuilder: """ cache_dir = Path(config.cache.dir).expanduser() - http_cache = HTTPCache(path=cache_dir, ttl=config.cache.ttl) + http_cache = HTTPCache(path=cache_dir) minio_client = MinIOClient(path=cache_dir) primary_api_config = config.api_configs[config.api_version] diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 002beabe0..fb1fee3a9 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -1,7 +1,6 @@ from __future__ import annotations from dataclasses import dataclass, field -from datetime import timedelta from openml.enums import APIVersion, RetryPolicy @@ -54,12 +53,9 @@ class CacheConfig: ---------- dir : str Path to the directory where cached files will be stored. - ttl : int - Time-to-live for cached entries, in seconds. """ dir: str - ttl: int @dataclass @@ -111,6 +107,5 @@ class Config: cache: CacheConfig = field( default_factory=lambda: CacheConfig( dir=str(_resolve_default_cache_dir()), - ttl=int(timedelta(weeks=1).total_seconds()), ) ) diff --git a/openml/testing.py b/openml/testing.py index 5a1a4d10f..54b95d23d 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -291,12 +291,10 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: retries = self.connection_n_retries retry_policy = RetryPolicy.HUMAN if self.retry_policy == "human" else RetryPolicy.ROBOT - ttl = openml._backend.get_config_value("cache.ttl") cache_dir = self.static_cache_dir self.cache = HTTPCache( path=cache_dir, - ttl=ttl, ) self.http_clients = { APIVersion.V1: HTTPClient( diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 2a1f2dcd5..c83536119 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -105,29 +105,6 @@ def test_get_uses_cached_response(self): self.assertEqual(response1.content, response2.content) self.assertEqual(response1.status_code, response2.status_code) - @pytest.mark.uses_test_server() - def test_get_cache_expires(self): - # force short TTL - self.cache.ttl = 1 - path = "task/1" - - url = self._prepare_url(path=path) - key = self.cache.get_key(url, {}) - cache_path = self.cache._key_to_path(key) / "meta.json" - - response1 = self.http_client.get(path, use_cache=True) - response1_cache_time_stamp = cache_path.stat().st_ctime - - time.sleep(2) - - response2 = self.http_client.get(path, use_cache=True) - response2_cache_time_stamp = cache_path.stat().st_ctime - - # cache expired -> new request - self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) - self.assertEqual(response2.status_code, 200) - self.assertEqual(response1.content, response2.content) - @pytest.mark.uses_test_server() def test_get_reset_cache(self): path = "task/1" From 001caad5669af089319af306a8c3d9d4bdb108b3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 16 Feb 2026 19:14:57 +0500 Subject: [PATCH 097/156] replace config.cache.dir with config.cache_dir --- openml/_api/__init__.py | 2 -- openml/_api/setup/__init__.py | 3 +-- openml/_api/setup/builder.py | 2 +- openml/_api/setup/config.py | 25 +++---------------------- openml/config.py | 2 +- 5 files changed, 6 insertions(+), 28 deletions(-) diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 926fee3d4..b7846fd39 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -41,7 +41,6 @@ APIBackend, APIBackendBuilder, APIConfig, - CacheConfig, Config, ConnectionConfig, _backend, @@ -52,7 +51,6 @@ "APIBackend", "APIBackendBuilder", "APIConfig", - "CacheConfig", "Config", "ConnectionConfig", "DatasetAPI", diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py index 1c28cfa9e..1f6e60ecb 100644 --- a/openml/_api/setup/__init__.py +++ b/openml/_api/setup/__init__.py @@ -1,13 +1,12 @@ from ._instance import _backend from .backend import APIBackend from .builder import APIBackendBuilder -from .config import APIConfig, CacheConfig, Config, ConnectionConfig +from .config import APIConfig, Config, ConnectionConfig __all__ = [ "APIBackend", "APIBackendBuilder", "APIConfig", - "CacheConfig", "Config", "ConnectionConfig", "_backend", diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 05c37807d..aa6ed4bba 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -82,7 +82,7 @@ def build(cls, config: Config) -> APIBackendBuilder: APIBackendBuilder Builder instance with all resource API interfaces initialized. """ - cache_dir = Path(config.cache.dir).expanduser() + cache_dir = Path(config.cache_dir).expanduser() http_cache = HTTPCache(path=cache_dir) minio_client = MinIOClient(path=cache_dir) diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index fb1fee3a9..5f6cd7891 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -44,20 +44,6 @@ class ConnectionConfig: retry_policy: RetryPolicy -@dataclass -class CacheConfig: - """ - Configuration for caching API responses locally. - - Parameters - ---------- - dir : str - Path to the directory where cached files will be stored. - """ - - dir: str - - @dataclass class Config: """ @@ -71,16 +57,17 @@ class Config: Primary API version to use (default is V1). fallback_api_version : APIVersion or None Optional fallback API version if the primary API does not support certain operations. + cache_dir : str + Path to the directory where cached files will be stored. api_configs : dict of APIVersion to APIConfig Mapping from API version to its server/base URL and API key configuration. connection : ConnectionConfig Settings for request retries and retry policy. - cache : CacheConfig - Settings for local caching of API responses. """ api_version: APIVersion = APIVersion.V1 fallback_api_version: APIVersion | None = None + cache_dir: str = str(_resolve_default_cache_dir()) api_configs: dict[APIVersion, APIConfig] = field( default_factory=lambda: { @@ -103,9 +90,3 @@ class Config: retry_policy=RetryPolicy.HUMAN, ) ) - - cache: CacheConfig = field( - default_factory=lambda: CacheConfig( - dir=str(_resolve_default_cache_dir()), - ) - ) diff --git a/openml/config.py b/openml/config.py index 692543a00..1c34f6949 100644 --- a/openml/config.py +++ b/openml/config.py @@ -540,10 +540,10 @@ def _sync_api_config() -> None: APIBackend.set_config_values( { + "cache_dir": cache_dir, "api_configs.v1.server": v1_server, "api_configs.v1.base_url": v1_base_url, "api_configs.v1.api_key": apikey, - "cache.dir": cache_dir, "connection.retry_policy": connection_retry_policy, "connection.retries": connection_n_retries, } From fb38a2d3affdcac8ba9c15ab315371a8415b1e1d Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 10:46:24 +0500 Subject: [PATCH 098/156] make HTTPClient.cache compulsory --- openml/_api/clients/http.py | 13 +++---------- tests/test_api/test_http.py | 14 -------------- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index dba9cac6b..e9f881e2e 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -19,7 +19,6 @@ from openml.__version__ import __version__ from openml.enums import RetryPolicy from openml.exceptions import ( - OpenMLCacheRequiredError, OpenMLHashException, OpenMLNotAuthorizedError, OpenMLServerError, @@ -231,7 +230,7 @@ def __init__( # noqa: PLR0913 api_key: str, retries: int, retry_policy: RetryPolicy, - cache: HTTPCache | None = None, + cache: HTTPCache, ) -> None: self.server = server self.base_url = base_url @@ -608,7 +607,7 @@ def request( # noqa: PLR0913, C901 files = request_kwargs.pop("files", None) - if use_cache and not reset_cache and self.cache is not None: + if use_cache and not reset_cache: cache_key = self.cache.get_key(url, params) try: return self.cache.load(cache_key) @@ -647,7 +646,7 @@ def request( # noqa: PLR0913, C901 if md5_checksum is not None: self._verify_checksum(response, md5_checksum) - if use_cache and self.cache is not None: + if use_cache: cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) @@ -812,15 +811,9 @@ def download( Raises ------ - OpenMLCacheRequiredError - If no cache instance is configured. OpenMLHashException If checksum verification fails. """ - if self.cache is None: - raise OpenMLCacheRequiredError( - "A cache object is required for download, but none was provided in the HTTPClient." - ) base = self.cache.path file_path = base / "downloads" / urlparse(url).path.lstrip("/") / file_name file_path = file_path.expanduser() diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index c83536119..ef20bd4ca 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -8,7 +8,6 @@ from urllib.parse import urljoin from openml.enums import APIVersion from openml._api import HTTPClient -from openml.exceptions import OpenMLCacheRequiredError class TestHTTPClient(TestAPIBase): @@ -149,19 +148,6 @@ def test_post_and_delete(self): response = self.http_client.delete(f"task/{task_id}") self.assertEqual(response.status_code, 200) - def test_download_requires_cache(self): - client = HTTPClient( - server=self.http_client.server, - base_url=self.http_client.base_url, - api_key=self.http_client.api_key, - retries=1, - retry_policy=self.http_client.retry_policy, - cache=None, - ) - - with pytest.raises(OpenMLCacheRequiredError): - client.download("https://www.openml.org") - @pytest.mark.uses_test_server() def test_download_creates_file(self): # small stable resource From 03c4ca9d93693fc59341e4c1c00d8d8585079a4b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 10:47:12 +0500 Subject: [PATCH 099/156] remove unused OpenMLCacheRequiredError --- openml/exceptions.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/openml/exceptions.py b/openml/exceptions.py index 10f693648..26c2d2591 100644 --- a/openml/exceptions.py +++ b/openml/exceptions.py @@ -69,7 +69,3 @@ class ObjectNotPublishedError(PyOpenMLError): class OpenMLNotSupportedError(PyOpenMLError): """Raised when an API operation is not supported for a resource/version.""" - - -class OpenMLCacheRequiredError(PyOpenMLError): - """Raised when a cache object is required but not provided.""" From 8d708fd287611964309993faf8094a4d3f08f5b9 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 11:00:56 +0500 Subject: [PATCH 100/156] implement and use TestAPIBase._create_resource --- openml/testing.py | 9 +++++++-- tests/test_api/test_versions.py | 30 ++++++++++++++++-------------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index 54b95d23d..9c31e9288 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -15,8 +15,8 @@ import requests import openml -from openml._api import HTTPCache, HTTPClient, MinIOClient -from openml.enums import APIVersion, RetryPolicy +from openml._api import API_REGISTRY, HTTPCache, HTTPClient, MinIOClient, ResourceAPI +from openml.enums import APIVersion, ResourceType, RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -316,6 +316,11 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: } self.minio_client = MinIOClient(path=cache_dir) + def _create_resource(self, api_version: APIVersion, resource_type: ResourceType) -> ResourceAPI: + http_client = self.http_clients[api_version] + resource_cls = API_REGISTRY[api_version][resource_type] + return resource_cls(http=http_client, minio=self.minio_client) + def check_task_existence( task_type: TaskType, diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index cdb37a0d3..2be35ba5c 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -48,9 +48,10 @@ def _tag_and_untag(self): class TestResourceV1API(TestResourceAPIBase): def setUp(self): super().setUp() - http_client = self.http_clients[APIVersion.V1] - self.resource = ResourceV1API(http_client) - self.resource.resource_type = ResourceType.TASK + self.resource = self._create_resource( + api_version=APIVersion.V1, + resource_type=ResourceType.TASK, + ) def test_publish_and_delete(self): self._publish_and_delete() @@ -62,9 +63,10 @@ def test_tag_and_untag(self): class TestResourceV2API(TestResourceAPIBase): def setUp(self): super().setUp() - http_client = self.http_clients[APIVersion.V2] - self.resource = ResourceV2API(http_client) - self.resource.resource_type = ResourceType.TASK + self.resource = self._create_resource( + api_version=APIVersion.V2, + resource_type=ResourceType.TASK, + ) def test_publish_and_delete(self): with pytest.raises(OpenMLNotSupportedError): @@ -78,14 +80,14 @@ def test_tag_and_untag(self): class TestResourceFallbackAPI(TestResourceAPIBase): def setUp(self): super().setUp() - http_client_v1 = self.http_clients[APIVersion.V1] - resource_v1 = ResourceV1API(http_client_v1) - resource_v1.resource_type = ResourceType.TASK - - http_client_v2 = self.http_clients[APIVersion.V2] - resource_v2 = ResourceV2API(http_client_v2) - resource_v2.resource_type = ResourceType.TASK - + resource_v1 = self._create_resource( + api_version=APIVersion.V1, + resource_type=ResourceType.TASK, + ) + resource_v2 = self._create_resource( + api_version=APIVersion.V2, + resource_type=ResourceType.TASK, + ) self.resource = FallbackProxy(resource_v2, resource_v1) def test_publish_and_delete(self): From 4f75bbadff265a9aa38284dad7af7409687eb24c Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 11:01:48 +0500 Subject: [PATCH 101/156] make ResourceAPI.minio compulsory --- openml/_api/resources/base/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 5a2c1faa6..51e41a0c8 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -44,7 +44,7 @@ class ResourceAPI(ABC): api_version: APIVersion resource_type: ResourceType - def __init__(self, http: HTTPClient, minio: MinIOClient | None = None): + def __init__(self, http: HTTPClient, minio: MinIOClient): self._http = http self._minio = minio From c4dae4362d2e7a46d387bbf315b3b25c1ba71493 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 12:43:13 +0500 Subject: [PATCH 102/156] rename: use_cache -> enable_cache; reset_cache -> refresh_cache --- openml/_api/clients/http.py | 33 +++++++++++++++++---------------- tests/test_api/test_http.py | 12 ++++++------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index e9f881e2e..3ab0def4f 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -550,8 +550,8 @@ def request( # noqa: PLR0913, C901 method: str, path: str, *, - use_cache: bool = False, - reset_cache: bool = False, + enable_cache: bool = False, + refresh_cache: bool = False, use_api_key: bool = False, md5_checksum: str | None = None, **request_kwargs: Any, @@ -565,10 +565,11 @@ def request( # noqa: PLR0913, C901 HTTP method to use. path : str API path relative to the base URL. - use_cache : bool, optional - Whether to load/store responses from cache. - reset_cache : bool, optional - If True, bypass existing cache entries. + enable_cache : bool, optional + Whether to load/store response from cache. + refresh_cache : bool, optional + Only used when `enable_cache=True`. If True, ignore any existing + cached response and overwrite it with a fresh one. use_api_key : bool, optional Whether to include the API key in query parameters. md5_checksum : str or None, optional @@ -607,7 +608,7 @@ def request( # noqa: PLR0913, C901 files = request_kwargs.pop("files", None) - if use_cache and not reset_cache: + if enable_cache and not refresh_cache: cache_key = self.cache.get_key(url, params) try: return self.cache.load(cache_key) @@ -646,7 +647,7 @@ def request( # noqa: PLR0913, C901 if md5_checksum is not None: self._verify_checksum(response, md5_checksum) - if use_cache: + if enable_cache: cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) @@ -680,8 +681,8 @@ def get( self, path: str, *, - use_cache: bool = False, - reset_cache: bool = False, + enable_cache: bool = False, + refresh_cache: bool = False, use_api_key: bool = False, md5_checksum: str | None = None, **request_kwargs: Any, @@ -693,9 +694,9 @@ def get( ---------- path : str API path relative to the base URL. - use_cache : bool, optional + enable_cache : bool, optional Whether to use the response cache. - reset_cache : bool, optional + refresh_cache : bool, optional Whether to ignore existing cached entries. use_api_key : bool, optional Whether to include the API key. @@ -712,8 +713,8 @@ def get( return self.request( method="GET", path=path, - use_cache=use_cache, - reset_cache=reset_cache, + enable_cache=enable_cache, + refresh_cache=refresh_cache, use_api_key=use_api_key, md5_checksum=md5_checksum, **request_kwargs, @@ -746,7 +747,7 @@ def post( return self.request( method="POST", path=path, - use_cache=False, + enable_cache=False, use_api_key=use_api_key, **request_kwargs, ) @@ -774,7 +775,7 @@ def delete( return self.request( method="DELETE", path=path, - use_cache=False, + enable_cache=False, use_api_key=True, **request_kwargs, ) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index ef20bd4ca..5ecd225d3 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -77,7 +77,7 @@ def test_get(self): @pytest.mark.uses_test_server() def test_get_with_cache_creates_cache(self): - response = self.http_client.get("task/1", use_cache=True) + response = self.http_client.get("task/1", enable_cache=True) self.assertEqual(response.status_code, 200) self.assertTrue(self.cache.path.exists()) @@ -96,26 +96,26 @@ def test_get_with_cache_creates_cache(self): @pytest.mark.uses_test_server() def test_get_uses_cached_response(self): # first request populates cache - response1 = self.http_client.get("task/1", use_cache=True) + response1 = self.http_client.get("task/1", enable_cache=True) # second request should load from cache - response2 = self.http_client.get("task/1", use_cache=True) + response2 = self.http_client.get("task/1", enable_cache=True) self.assertEqual(response1.content, response2.content) self.assertEqual(response1.status_code, response2.status_code) @pytest.mark.uses_test_server() - def test_get_reset_cache(self): + def test_get_refresh_cache(self): path = "task/1" url = self._prepare_url(path=path) key = self.cache.get_key(url, {}) cache_path = self.cache._key_to_path(key) / "meta.json" - response1 = self.http_client.get(path, use_cache=True) + response1 = self.http_client.get(path, enable_cache=True) response1_cache_time_stamp = cache_path.stat().st_ctime - response2 = self.http_client.get(path, use_cache=True, reset_cache=True) + response2 = self.http_client.get(path, enable_cache=True, refresh_cache=True) response2_cache_time_stamp = cache_path.stat().st_ctime self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) From 36c20a2e0ddecf99b33f1c334729367cc67d7ed9 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 14:28:11 +0500 Subject: [PATCH 103/156] use server config from TestBase --- openml/testing.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index dbb7945bc..a971275d9 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -293,14 +293,18 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: retry_policy = RetryPolicy.HUMAN if self.retry_policy == "human" else RetryPolicy.ROBOT cache_dir = self.static_cache_dir + v1_server = self.test_server.split("api/")[0] + v1_base_url = self.test_server.replace(v1_server, "").rstrip("/") + "/" + v1_api_key = self.user_key + self.cache = HTTPCache( path=cache_dir, ) self.http_clients = { APIVersion.V1: HTTPClient( - server="https://test.openml.org/", - base_url="api/v1/xml/", - api_key="normaluser", + server=v1_server, + base_url=v1_base_url, + api_key=v1_api_key, retries=retries, retry_policy=retry_policy, cache=self.cache, From ab3c1eb674233f773a52e31fcbea6d20aec88017 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 14:28:55 +0500 Subject: [PATCH 104/156] tests: mock HTTP post calls to prevent race conditions Previously, multiple tests were publishing the same task concurrently, which increased the likelihood of race conditions and flaky failures. This update replaces real HTTP post calls with mocks, making the tests deterministic and isolated from the server. --- tests/test_api/test_http.py | 74 +++++++------ tests/test_api/test_versions.py | 182 +++++++++++++++++++++++--------- 2 files changed, 176 insertions(+), 80 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 5ecd225d3..73a29264d 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -1,8 +1,7 @@ -from requests import Response, Request -import time -import xmltodict +from requests import Response, Request, Session +from unittest.mock import patch import pytest -from openml.testing import TestBase, TestAPIBase +from openml.testing import TestAPIBase import os from pathlib import Path from urllib.parse import urljoin @@ -122,32 +121,6 @@ def test_get_refresh_cache(self): self.assertEqual(response2.status_code, 200) self.assertEqual(response1.content, response2.content) - @pytest.mark.uses_test_server() - def test_post_and_delete(self): - task_xml = """ - - 5 - 193 - 17 - - """ - # post - response = self.http_client.post( - "task", - files={"description": task_xml}, - ) - self.assertEqual(response.status_code, 200) - xml_resp = xmltodict.parse(response.content) - task_id = int(xml_resp["oml:upload_task"]["oml:id"]) - - # cleanup incase of failure - TestBase._mark_entity_for_removal("task", task_id) - TestBase.logger.info(f"collected from {__file__}: {task_id}") - - # delete - response = self.http_client.delete(f"task/{task_id}") - self.assertEqual(response.status_code, 200) - @pytest.mark.uses_test_server() def test_download_creates_file(self): # small stable resource @@ -198,3 +171,44 @@ def handler(response, path: Path, encoding: str): assert path.exists() assert path.read_text() == "HANDLED" + + def test_post(self): + resource_name = "resource" + resource_files = {"description": """Resource Description File"""} + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + + self.http_client.post( + resource_name, + files=resource_files, + ) + + mock_request.assert_called_once_with( + method="POST", + url=self.http_client.server + self.http_client.base_url + resource_name, + params={}, + data={'api_key': self.http_client.api_key}, + headers=self.http_client.headers, + files=resource_files, + ) + + def test_delete(self): + resource_name = "resource" + resource_id = 123 + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + + self.http_client.delete(f"{resource_name}/{resource_id}") + + mock_request.assert_called_once_with( + method="DELETE", + url=self.http_client.server + self.http_client.base_url + resource_name + "/" + str(resource_id), + params={'api_key': self.http_client.api_key}, + data={}, + headers=self.http_client.headers, + files=None, + ) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 2be35ba5c..fd953f3ac 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,49 +1,106 @@ -from time import time import pytest -from openml.testing import TestBase, TestAPIBase -from openml._api import ResourceV1API, ResourceV2API, FallbackProxy, ResourceAPI +from requests import Session, Response +from unittest.mock import patch +from openml.testing import TestAPIBase +from openml._api import FallbackProxy, ResourceAPI from openml.enums import ResourceType, APIVersion from openml.exceptions import OpenMLNotSupportedError -@pytest.mark.uses_test_server() class TestResourceAPIBase(TestAPIBase): resource: ResourceAPI | FallbackProxy - def _publish_and_delete(self): - task_xml = """ - - 5 - 193 - 17 - - """ - # publish - task_id = self.resource.publish( - "task", - files={"description": task_xml}, - ) - self.assertIsNotNone(task_id) - - # cleanup incase of failure - TestBase._mark_entity_for_removal("task", task_id) - TestBase.logger.info(f"collected from {__file__}: {task_id}") - - # delete - success = self.resource.delete(task_id) - self.assertTrue(success) - - def _tag_and_untag(self): - resource_id = 1 - unique_indicator = str(time()).replace(".", "") - tag = f"{self.__class__.__name__}_test_tag_and_untag_{unique_indicator}" - - tags = self.resource.tag(resource_id, tag) - self.assertIn(tag, tags) - - tags = self.resource.untag(resource_id, tag) - self.assertNotIn(tag, tags) - + @property + def http_client(self): + return self.resource._http + + def _publish(self): + resource_name = "task" + resource_files = {"description": """Resource Description File"""} + resource_id = 123 + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = f'\n\t{resource_id}\n\n'.encode("utf-8") + + published_resource_id = self.resource.publish( + resource_name, + files=resource_files, + ) + + self.assertEqual(resource_id, published_resource_id) + + mock_request.assert_called_once_with( + method="POST", + url=self.http_client.server + self.http_client.base_url + resource_name, + params={}, + data={'api_key': self.http_client.api_key}, + headers=self.http_client.headers, + files=resource_files, + ) + + def _delete(self): + resource_name = "task" + resource_id = 123 + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = f'\n {resource_id}\n\n'.encode("utf-8") + + self.resource.delete(resource_id) + + mock_request.assert_called_once_with( + method="DELETE", + url=self.http_client.server + self.http_client.base_url + resource_name + "/" + str(resource_id), + params={'api_key': self.http_client.api_key}, + data={}, + headers=self.http_client.headers, + files=None, + ) + + def _tag(self): + resource_id = 123 + resource_tag = "TAG" + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = f'{resource_id}{resource_tag}'.encode("utf-8") + + tags = self.resource.tag(resource_id, resource_tag) + self.assertIn(resource_tag, tags) + + mock_request.assert_called_once_with( + method="POST", + url=self.http_client.server + self.http_client.base_url + self.resource.resource_type + "/tag", + params={}, + data={'api_key': self.http_client.api_key, 'task_id': resource_id, 'tag': resource_tag}, + headers=self.http_client.headers, + files=None, + ) + + def _untag(self): + resource_id = 123 + resource_tag = "TAG" + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = f'{resource_id}'.encode("utf-8") + + tags = self.resource.untag(resource_id, resource_tag) + self.assertNotIn(resource_tag, tags) + + mock_request.assert_called_once_with( + method="POST", + url=self.http_client.server + self.http_client.base_url + self.resource.resource_type + "/untag", + params={}, + data={'api_key': self.http_client.api_key, 'task_id': resource_id, 'tag': resource_tag}, + headers=self.http_client.headers, + files=None, + ) class TestResourceV1API(TestResourceAPIBase): def setUp(self): @@ -53,11 +110,17 @@ def setUp(self): resource_type=ResourceType.TASK, ) - def test_publish_and_delete(self): - self._publish_and_delete() + def test_publish(self): + self._publish() + + def test_delete(self): + self._delete() - def test_tag_and_untag(self): - self._tag_and_untag() + def test_tag(self): + self._tag() + + def test_untag(self): + self._untag() class TestResourceV2API(TestResourceAPIBase): @@ -68,16 +131,29 @@ def setUp(self): resource_type=ResourceType.TASK, ) - def test_publish_and_delete(self): + def test_publish(self): + with pytest.raises(OpenMLNotSupportedError): + self._publish() + + def test_delete(self): + with pytest.raises(OpenMLNotSupportedError): + self._delete() + + def test_tag(self): with pytest.raises(OpenMLNotSupportedError): - self._tag_and_untag() + self._tag() - def test_tag_and_untag(self): + def test_untag(self): with pytest.raises(OpenMLNotSupportedError): - self._tag_and_untag() + self._untag() class TestResourceFallbackAPI(TestResourceAPIBase): + @property + def http_client(self): + # since these methods are not implemented for v2, they will fallback to v1 api + return self.http_clients[APIVersion.V1] + def setUp(self): super().setUp() resource_v1 = self._create_resource( @@ -90,8 +166,14 @@ def setUp(self): ) self.resource = FallbackProxy(resource_v2, resource_v1) - def test_publish_and_delete(self): - self._publish_and_delete() + def test_publish(self): + self._publish() + + def test_delete(self): + self._delete() + + def test_tag(self): + self._tag() - def test_tag_and_untag(self): - self._tag_and_untag() + def test_untag(self): + self._untag() From 599c7e112abd3aa9ad6170cabb7446a50765051d Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 19 Feb 2026 02:37:23 +0500 Subject: [PATCH 105/156] remove hardcoded server in TestHTTPClient.test_cache --- tests/test_api/test_http.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 73a29264d..80001cc8d 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -4,7 +4,7 @@ from openml.testing import TestAPIBase import os from pathlib import Path -from urllib.parse import urljoin +from urllib.parse import urljoin, urlparse from openml.enums import APIVersion from openml._api import HTTPClient @@ -22,20 +22,22 @@ def _prepare_url(self, path: str | None = None) -> str: return urljoin(server, urljoin(base_url, path)) def test_cache(self): - url = self._prepare_url(path="task/31") + path = "task/31" params = {"param1": "value1", "param2": "value2"} + url = self._prepare_url(path=path) + + server_keys = urlparse(self.http_client.server).netloc.split(".")[::-1] + base_url_keys = self.http_client.base_url.strip("/").split("/") + path_keys = path.split("/") + params_key = "&".join([f"{k}={v}" for k, v in params.items()]) + key = self.cache.get_key(url, params) expected_key = os.path.join( - "org", - "openml", - "test", - "api", - "v1", - "xml", - "task", - "31", - "param1=value1¶m2=value2", + *server_keys, + *base_url_keys, + *path_keys, + params_key, ) # validate key From 286786223c61b6c9fe419ebb918979ea0cad737c Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 19 Feb 2026 02:44:19 +0500 Subject: [PATCH 106/156] fix docstring in _resolve_default_cache_dir --- openml/_api/setup/_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openml/_api/setup/_utils.py b/openml/_api/setup/_utils.py index 6606140f9..f2e382bfc 100644 --- a/openml/_api/setup/_utils.py +++ b/openml/_api/setup/_utils.py @@ -34,7 +34,8 @@ def _resolve_default_cache_dir() -> Path: - Uses ``$XDG_CACHE_HOME/openml`` if ``XDG_CACHE_HOME`` is set. - Falls back to ``~/.cache/openml`` if ``XDG_CACHE_HOME`` is not set. - If an old cache directory exists at ``$XDG_CACHE_HOME/org/openml``, - a warning is logged for backward compatibility. + a warning is logged for backward compatibility. In this case, + ``$XDG_CACHE_HOME`` is returned instead of ``$XDG_CACHE_HOME/openml``. """ user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") if user_defined_cache_dir is not None: From f09f3cd658e159579fbfe53fcb305a6f3fc75cac Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 19 Feb 2026 02:45:59 +0500 Subject: [PATCH 107/156] fix docstring in ResourceAPI --- openml/_api/resources/base/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 51e41a0c8..68aae2162 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -26,8 +26,8 @@ class ResourceAPI(ABC): ---------- http : HTTPClient Configured HTTP client used for communication with the OpenML API. - minio : MinIOClient or None, optional - Optional MinIO client used for object storage operations. + minio : MinIOClient + Configured MinIO client used for object storage operations. Attributes ---------- From 5f731cec1026ffd1ddea011583a68960444111a1 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 19 Feb 2026 02:47:33 +0500 Subject: [PATCH 108/156] remove duplicates in __all__ --- openml/_api/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index b7846fd39..60aa82762 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -66,7 +66,6 @@ "EvaluationV1API", "EvaluationV2API", "FallbackProxy", - "FallbackProxy", "FlowAPI", "FlowV1API", "FlowV2API", @@ -74,7 +73,6 @@ "HTTPClient", "MinIOClient", "ResourceAPI", - "ResourceAPI", "ResourceV1API", "ResourceV2API", "RunAPI", From bad784266c87c7444af8604f1130fbb5da503f6b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 19 Feb 2026 02:52:22 +0500 Subject: [PATCH 109/156] remove ttl related code/docs --- openml/_api/clients/http.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 3ab0def4f..38f922d72 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -36,8 +36,7 @@ class HTTPCache: This class stores HTTP responses on disk using a structured directory layout derived from the request URL and parameters. Each cached response consists of three files: metadata (``meta.json``), headers (``headers.json``), and the raw - body (``body.bin``). Entries are considered valid until their time-to-live - (TTL) expires. + body (``body.bin``). Parameters ---------- @@ -115,8 +114,6 @@ def load(self, key: str) -> Response: ------ FileNotFoundError If the cache entry or required files are missing. - TimeoutError - If the cached entry has expired based on the configured TTL. ValueError If required metadata is missing or malformed. """ @@ -135,10 +132,6 @@ def load(self, key: str) -> Response: with meta_path.open("r", encoding="utf-8") as f: meta = json.load(f) - created_at = meta.get("created_at") - if created_at is None: - raise ValueError("Cache metadata missing 'created_at'") - with headers_path.open("r", encoding="utf-8") as f: headers = json.load(f) @@ -612,8 +605,8 @@ def request( # noqa: PLR0913, C901 cache_key = self.cache.get_key(url, params) try: return self.cache.load(cache_key) - except (FileNotFoundError, TimeoutError): - pass # cache miss or expired, continue + except FileNotFoundError: + pass # cache miss, continue except Exception: raise # propagate unexpected cache errors From aefdb384fc93c1c6963c5935723e4eb2ae912742 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 19 Feb 2026 02:53:26 +0500 Subject: [PATCH 110/156] remove delay methods in HTTPClient --- openml/_api/clients/http.py | 41 ------------------------------------- 1 file changed, 41 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 38f922d72..cbb5d423a 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -3,8 +3,6 @@ import hashlib import json import logging -import math -import random import time import xml from collections.abc import Callable, Mapping @@ -235,45 +233,6 @@ def __init__( # noqa: PLR0913 self.retry_func = human_delay if retry_policy == RetryPolicy.HUMAN else robot_delay self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - def _robot_delay(self, n: int) -> float: - """ - Compute delay for automated retry policy. - - Parameters - ---------- - n : int - Current retry attempt number (1-based). - - Returns - ------- - float - Number of seconds to wait before the next retry. - - Notes - ----- - Uses a sigmoid-based growth curve with Gaussian noise to gradually - increase waiting time. - """ - wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 - variation = random.gauss(0, wait / 10) - return max(1.0, wait + variation) - - def _human_delay(self, n: int) -> float: - """ - Compute delay for human-like retry policy. - - Parameters - ---------- - n : int - Current retry attempt number (1-based). - - Returns - ------- - float - Number of seconds to wait before the next retry. - """ - return max(1.0, n) - def _parse_exception_response( self, response: Response, From 0f40b0276d6329fb09a71f2e3c44163f5448f7f6 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 19 Feb 2026 02:54:56 +0500 Subject: [PATCH 111/156] minor fix in _resolve_default_cache_dir --- openml/_api/setup/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/setup/_utils.py b/openml/_api/setup/_utils.py index f2e382bfc..678138b3e 100644 --- a/openml/_api/setup/_utils.py +++ b/openml/_api/setup/_utils.py @@ -46,7 +46,7 @@ def _resolve_default_cache_dir() -> Path: xdg_cache_home = os.environ.get("XDG_CACHE_HOME") if xdg_cache_home is None: - return Path("~", ".cache", "openml") + return Path("~", ".cache", "openml").expanduser() # This is the proper XDG_CACHE_HOME directory, but # we unfortunately had a problem where we used XDG_CACHE_HOME/org, From 7ac16726c4b01aa4340d8aadabb2b8c28f7f0067 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 19 Feb 2026 03:12:09 +0500 Subject: [PATCH 112/156] update FallbackProxy --- openml/_api/resources/base/fallback.py | 165 +++++-------------------- openml/_api/setup/builder.py | 5 +- tests/test_api/test_versions.py | 5 +- 3 files changed, 38 insertions(+), 137 deletions(-) diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py index 9b8f64a17..6b714c030 100644 --- a/openml/_api/resources/base/fallback.py +++ b/openml/_api/resources/base/fallback.py @@ -1,166 +1,61 @@ from __future__ import annotations from collections.abc import Callable -from typing import Any +from typing import TYPE_CHECKING, Any from openml.exceptions import OpenMLNotSupportedError +if TYPE_CHECKING: + from .base import ResourceAPI + class FallbackProxy: """ - Proxy object that provides transparent fallback across multiple API versions. - - This class delegates attribute access to a sequence of API implementations. - When a callable attribute is invoked and raises ``OpenMLNotSupportedError``, - the proxy automatically attempts the same method on subsequent API instances - until one succeeds. + Proxy object that provides transparent fallback between two API versions. Parameters ---------- - *api_versions : Any - One or more API implementation instances ordered by priority. - The first API is treated as the primary implementation, and - subsequent APIs are used as fallbacks. - - Raises - ------ - ValueError - If no API implementations are provided. - - Notes - ----- - Attribute lookup is performed dynamically via ``__getattr__``. - Only methods that raise ``OpenMLNotSupportedError`` trigger fallback - behavior. Other exceptions are propagated immediately. + primary_api : Any + Primary API implementation. + fallback_api : Any + Secondary API implementation used if the primary raises + ``OpenMLNotSupportedError``. """ - def __init__(self, *api_versions: Any): - if not api_versions: - raise ValueError("At least one API version must be provided") - self._apis = api_versions + def __init__(self, primary_api: ResourceAPI, fallback_api: ResourceAPI): + self._primary = primary_api + self._fallback = fallback_api def __getattr__(self, name: str) -> Any: - """ - Dynamically resolve attribute access across API implementations. - - Parameters - ---------- - name : str - Name of the attribute being accessed. + primary_attr = getattr(self._primary, name, None) + fallback_attr = getattr(self._fallback, name, None) - Returns - ------- - Any - The resolved attribute. If it is callable, a wrapped function - providing fallback behavior is returned. + if primary_attr is None and fallback_attr is None: + raise AttributeError(f"{self.__class__.__name__} has no attribute {name}") - Raises - ------ - AttributeError - If none of the API implementations define the attribute. - """ - api, attr = self._find_attr(name) - if callable(attr): - return self._wrap_callable(name, api, attr) - return attr + # If attribute exists on primary + if primary_attr is not None: + if callable(primary_attr): + return self._wrap_callable(name, primary_attr) + return primary_attr - def _find_attr(self, name: str) -> tuple[Any, Any]: - """ - Find the first API implementation that defines a given attribute. - - Parameters - ---------- - name : str - Name of the attribute to search for. - - Returns - ------- - tuple of (Any, Any) - The API instance and the corresponding attribute. - - Raises - ------ - AttributeError - If no API implementation defines the attribute. - """ - for api in self._apis: - attr = getattr(api, name, None) - if attr is not None: - return api, attr - raise AttributeError(f"{self.__class__.__name__} has no attribute {name}") + # Otherwise return fallback attribute directly + return fallback_attr def _wrap_callable( self, name: str, - primary_api: Any, primary_attr: Callable[..., Any], ) -> Callable[..., Any]: - """ - Wrap a callable attribute to enable fallback behavior. - - Parameters - ---------- - name : str - Name of the method being wrapped. - primary_api : Any - Primary API instance providing the callable. - primary_attr : Callable[..., Any] - Callable attribute obtained from the primary API. - - Returns - ------- - Callable[..., Any] - Wrapped function that attempts the primary call first and - falls back to other APIs if ``OpenMLNotSupportedError`` is raised. - """ - def wrapper(*args: Any, **kwargs: Any) -> Any: try: return primary_attr(*args, **kwargs) except OpenMLNotSupportedError: - return self._call_fallbacks(name, primary_api, *args, **kwargs) + fallback_attr = getattr(self._fallback, name, None) + if callable(fallback_attr): + return fallback_attr(*args, **kwargs) + raise OpenMLNotSupportedError( + f"Method '{name}' not supported by primary or fallback API" + ) from None return wrapper - - def _call_fallbacks( - self, - name: str, - skip_api: Any, - *args: Any, - **kwargs: Any, - ) -> Any: - """ - Attempt to call a method on fallback API implementations. - - Parameters - ---------- - name : str - Name of the method to invoke. - skip_api : Any - API instance to skip (typically the primary API that already failed). - *args : Any - Positional arguments passed to the method. - **kwargs : Any - Keyword arguments passed to the method. - - Returns - ------- - Any - Result returned by the first successful fallback invocation. - - Raises - ------ - OpenMLNotSupportedError - If all API implementations either do not define the method - or raise ``OpenMLNotSupportedError``. - """ - for api in self._apis: - if api is skip_api: - continue - attr = getattr(api, name, None) - if callable(attr): - try: - return attr(*args, **kwargs) - except OpenMLNotSupportedError: - continue - raise OpenMLNotSupportedError(f"Could not fallback to any API for method: {name}") diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index aa6ed4bba..0c96df877 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -121,7 +121,10 @@ def build(cls, config: Config) -> APIBackendBuilder: ) merged: dict[ResourceType, FallbackProxy] = { - name: FallbackProxy(resource_apis[name], fallback_resource_apis[name]) + name: FallbackProxy( + primary_api=resource_apis[name], + fallback_api=fallback_resource_apis[name], + ) for name in resource_apis } diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index fd953f3ac..2899cf6a7 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -164,7 +164,10 @@ def setUp(self): api_version=APIVersion.V2, resource_type=ResourceType.TASK, ) - self.resource = FallbackProxy(resource_v2, resource_v1) + self.resource = FallbackProxy( + primary_api=resource_v2, + fallback_api=resource_v1, + ) def test_publish(self): self._publish() From 6ac1dfeeea3fb4aab1e7dc8d8cf6b6f0b627e9bd Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 19 Feb 2026 03:14:09 +0500 Subject: [PATCH 113/156] simplify _backend creation --- openml/_api/setup/__init__.py | 3 ++- openml/_api/setup/_instance.py | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) delete mode 100644 openml/_api/setup/_instance.py diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py index 1f6e60ecb..4c7fce119 100644 --- a/openml/_api/setup/__init__.py +++ b/openml/_api/setup/__init__.py @@ -1,8 +1,9 @@ -from ._instance import _backend from .backend import APIBackend from .builder import APIBackendBuilder from .config import APIConfig, Config, ConnectionConfig +_backend = APIBackend.get_instance() + __all__ = [ "APIBackend", "APIBackendBuilder", diff --git a/openml/_api/setup/_instance.py b/openml/_api/setup/_instance.py deleted file mode 100644 index c98ccaf57..000000000 --- a/openml/_api/setup/_instance.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import annotations - -from .backend import APIBackend - -_backend = APIBackend.get_instance() From 27696bbfc79aa20b89a98be36f10f33648047707 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 20 Feb 2026 13:45:41 +0530 Subject: [PATCH 114/156] req changes --- openml/__init__.py | 2 +- openml/_config.py | 132 +++++++++++++++---------------- tests/test_openml/test_config.py | 2 +- 3 files changed, 67 insertions(+), 69 deletions(-) diff --git a/openml/__init__.py b/openml/__init__.py index d5cb99fd9..9a457c146 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -54,7 +54,7 @@ if TYPE_CHECKING: from ._config import OpenMLConfigManager -config: OpenMLConfigManager = _config_module._config +config: OpenMLConfigManager = _config_module.__config def populate_cache( diff --git a/openml/_config.py b/openml/_config.py index 9dd75c989..26bcca448 100644 --- a/openml/_config.py +++ b/openml/_config.py @@ -28,11 +28,11 @@ def _resolve_default_cache_dir() -> Path: return Path(user_defined_cache_dir) if platform.system().lower() != "linux": - return Path("~", ".openml") + return Path("~", ".openml").expanduser() xdg_cache_home = os.environ.get("XDG_CACHE_HOME") if xdg_cache_home is None: - return Path("~", ".cache", "openml") + return Path("~", ".cache", "openml").expanduser() cache_dir = Path(xdg_cache_home) / "openml" if cache_dir.exists(): @@ -57,7 +57,7 @@ def _resolve_default_cache_dir() -> Path: class OpenMLConfig: """Dataclass storing the OpenML configuration.""" - apikey: str = "" + apikey: str | None = "" server: str = "https://www.openml.org/api/v1/xml" cachedir: Path = field(default_factory=_resolve_default_cache_dir) avoid_duplicate_runs: bool = False @@ -83,8 +83,6 @@ def __init__(self) -> None: self.OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" self._TEST_SERVER_NORMAL_USER_KEY = "normaluser" - self._user_path = Path("~").expanduser().absolute() - self._config: OpenMLConfig = OpenMLConfig() # for legacy test `test_non_writable_home` self._defaults: dict[str, Any] = OpenMLConfig().__dict__.copy() @@ -93,7 +91,7 @@ def __init__(self) -> None: self.logger = logger self.openml_logger = openml_logger - self._examples = self.ConfigurationForExamples(self) + self._examples = ConfigurationForExamples(self) self._setup() @@ -125,7 +123,6 @@ def __setattr__(self, name: str, value: Any) -> None: "OPENML_CACHE_DIR_ENV_VAR", "OPENML_SKIP_PARQUET_ENV_VAR", "_TEST_SERVER_NORMAL_USER_KEY", - "_user_path", }: return object.__setattr__(self, name, value) @@ -397,70 +394,71 @@ def overwrite_config_context(self, config: dict[str, Any]) -> Iterator[dict[str, yield merged_config self._setup(existing_config) - class ConfigurationForExamples: - """Allows easy switching to and from a test configuration, used for examples.""" - - _last_used_server = None - _last_used_key = None - _start_last_called = False - - def __init__(self, manager: OpenMLConfigManager): - self._manager = manager - self._test_apikey = manager._TEST_SERVER_NORMAL_USER_KEY - self._test_server = "https://test.openml.org/api/v1/xml" - - def start_using_configuration_for_example(self) -> None: - """Sets the configuration to connect to the test server with valid apikey. - - To configuration as was before this call is stored, and can be recovered - by using the `stop_use_example_configuration` method. - """ - if ( - self._start_last_called - and self._manager._config.server == self._test_server - and self._manager._config.apikey == self._test_apikey - ): - # Method is called more than once in a row without modifying the server or apikey. - # We don't want to save the current test configuration as a last used configuration. - return - - self._last_used_server = self._manager._config.server - self._last_used_key = self._manager._config.apikey - type(self)._start_last_called = True - - # Test server key for examples - self._manager._config = replace( - self._manager._config, - server=self._test_server, - apikey=self._test_apikey, - ) - warnings.warn( - f"Switching to the test server {self._test_server} to not upload results to " - "the live server. Using the test server may result in reduced performance of the " - "API!", - stacklevel=2, - ) - def stop_using_configuration_for_example(self) -> None: - """Return to configuration as it was before `start_use_example_configuration`.""" - if not type(self)._start_last_called: - # We don't want to allow this because it will (likely) result in the `server` and - # `apikey` variables being set to None. - raise RuntimeError( - "`stop_use_example_configuration` called without a saved config." - "`start_use_example_configuration` must be called first.", - ) - - self._manager._config = replace( - self._manager._config, - server=cast("str", self._last_used_server), - apikey=cast("str", self._last_used_key), +class ConfigurationForExamples: + """Allows easy switching to and from a test configuration, used for examples.""" + + _last_used_server = None + _last_used_key = None + _start_last_called = False + + def __init__(self, manager: OpenMLConfigManager): + self._manager = manager + self._test_apikey = manager._TEST_SERVER_NORMAL_USER_KEY + self._test_server = "https://test.openml.org/api/v1/xml" + + def start_using_configuration_for_example(self) -> None: + """Sets the configuration to connect to the test server with valid apikey. + + To configuration as was before this call is stored, and can be recovered + by using the `stop_use_example_configuration` method. + """ + if ( + self._start_last_called + and self._manager._config.server == self._test_server + and self._manager._config.apikey == self._test_apikey + ): + # Method is called more than once in a row without modifying the server or apikey. + # We don't want to save the current test configuration as a last used configuration. + return + + self._last_used_server = self._manager._config.server + self._last_used_key = self._manager._config.apikey + type(self)._start_last_called = True + + # Test server key for examples + self._manager._config = replace( + self._manager._config, + server=self._test_server, + apikey=self._test_apikey, + ) + warnings.warn( + f"Switching to the test server {self._test_server} to not upload results to " + "the live server. Using the test server may result in reduced performance of the " + "API!", + stacklevel=2, + ) + + def stop_using_configuration_for_example(self) -> None: + """Return to configuration as it was before `start_use_example_configuration`.""" + if not type(self)._start_last_called: + # We don't want to allow this because it will (likely) result in the `server` and + # `apikey` variables being set to None. + raise RuntimeError( + "`stop_use_example_configuration` called without a saved config." + "`start_use_example_configuration` must be called first.", ) - type(self)._start_last_called = False + + self._manager._config = replace( + self._manager._config, + server=cast("str", self._last_used_server), + apikey=cast("str", self._last_used_key), + ) + type(self)._start_last_called = False -_config = OpenMLConfigManager() +__config = OpenMLConfigManager() def __getattr__(name: str) -> Any: - return getattr(_config, name) + return getattr(__config, name) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index e39be87a6..1f0347f3b 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -135,7 +135,7 @@ def test_example_configuration_stop_before_start(self): error_regex = ".*stop_use_example_configuration.*start_use_example_configuration.*first" # Tests do not reset the state of this class. Thus, we ensure it is in # the original state before the test. - openml.config.ConfigurationForExamples._start_last_called = False + openml.config._examples._start_last_called = False self.assertRaisesRegex( RuntimeError, error_regex, From 95daaa6b4bc01ee3fe1c23a1e3d7757caa705c66 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 20 Feb 2026 13:56:30 +0530 Subject: [PATCH 115/156] remove old config file --- openml/config.py | 529 ----------------------------------------------- 1 file changed, 529 deletions(-) delete mode 100644 openml/config.py diff --git a/openml/config.py b/openml/config.py deleted file mode 100644 index 638b45650..000000000 --- a/openml/config.py +++ /dev/null @@ -1,529 +0,0 @@ -"""Store module level information like the API key, cache directory and the server""" - -# License: BSD 3-Clause -from __future__ import annotations - -import configparser -import logging -import logging.handlers -import os -import platform -import shutil -import warnings -from collections.abc import Iterator -from contextlib import contextmanager -from io import StringIO -from pathlib import Path -from typing import Any, Literal, cast -from typing_extensions import TypedDict -from urllib.parse import urlparse - -logger = logging.getLogger(__name__) -openml_logger = logging.getLogger("openml") -console_handler: logging.StreamHandler | None = None -file_handler: logging.handlers.RotatingFileHandler | None = None - -OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" -OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" -OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY" -_TEST_SERVER_NORMAL_USER_KEY = "normaluser" - -TEST_SERVER_URL = "https://test.openml.org" - - -class _Config(TypedDict): - apikey: str - server: str - cachedir: Path - avoid_duplicate_runs: bool - retry_policy: Literal["human", "robot"] - connection_n_retries: int - show_progress: bool - - -def _create_log_handlers(create_file_handler: bool = True) -> None: # noqa: FBT002 - """Creates but does not attach the log handlers.""" - global console_handler, file_handler # noqa: PLW0603 - if console_handler is not None or file_handler is not None: - logger.debug("Requested to create log handlers, but they are already created.") - return - - message_format = "[%(levelname)s] [%(asctime)s:%(name)s] %(message)s" - output_formatter = logging.Formatter(message_format, datefmt="%H:%M:%S") - - console_handler = logging.StreamHandler() - console_handler.setFormatter(output_formatter) - - if create_file_handler: - one_mb = 2**20 - log_path = _root_cache_directory / "openml_python.log" - file_handler = logging.handlers.RotatingFileHandler( - log_path, - maxBytes=one_mb, - backupCount=1, - delay=True, - ) - file_handler.setFormatter(output_formatter) - - -def _convert_log_levels(log_level: int) -> tuple[int, int]: - """Converts a log level that's either defined by OpenML/Python to both specifications.""" - # OpenML verbosity level don't match Python values directly: - openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG} - python_to_openml = { - logging.DEBUG: 2, - logging.INFO: 1, - logging.WARNING: 0, - logging.CRITICAL: 0, - logging.ERROR: 0, - } - # Because the dictionaries share no keys, we use `get` to convert as necessary: - openml_level = python_to_openml.get(log_level, log_level) - python_level = openml_to_python.get(log_level, log_level) - return openml_level, python_level - - -def _set_level_register_and_store(handler: logging.Handler, log_level: int) -> None: - """Set handler log level, register it if needed, save setting to config file if specified.""" - _oml_level, py_level = _convert_log_levels(log_level) - handler.setLevel(py_level) - - if openml_logger.level > py_level or openml_logger.level == logging.NOTSET: - openml_logger.setLevel(py_level) - - if handler not in openml_logger.handlers: - openml_logger.addHandler(handler) - - -def set_console_log_level(console_output_level: int) -> None: - """Set console output to the desired level and register it with openml logger if needed.""" - global console_handler # noqa: PLW0602 - assert console_handler is not None - _set_level_register_and_store(console_handler, console_output_level) - - -def set_file_log_level(file_output_level: int) -> None: - """Set file output to the desired level and register it with openml logger if needed.""" - global file_handler # noqa: PLW0602 - assert file_handler is not None - _set_level_register_and_store(file_handler, file_output_level) - - -# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) -_user_path = Path("~").expanduser().absolute() - - -def _resolve_default_cache_dir() -> Path: - user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) - if user_defined_cache_dir is not None: - return Path(user_defined_cache_dir) - - if platform.system().lower() != "linux": - return _user_path / ".openml" - - xdg_cache_home = os.environ.get("XDG_CACHE_HOME") - if xdg_cache_home is None: - return Path("~", ".cache", "openml") - - # This is the proper XDG_CACHE_HOME directory, but - # we unfortunately had a problem where we used XDG_CACHE_HOME/org, - # we check heuristically if this old directory still exists and issue - # a warning if it does. There's too much data to move to do this for the user. - - # The new cache directory exists - cache_dir = Path(xdg_cache_home) / "openml" - if cache_dir.exists(): - return cache_dir - - # The old cache directory *does not* exist - heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" - if not heuristic_dir_for_backwards_compat.exists(): - return cache_dir - - root_dir_to_delete = Path(xdg_cache_home) / "org" - openml_logger.warning( - "An old cache directory was found at '%s'. This directory is no longer used by " - "OpenML-Python. To silence this warning you would need to delete the old cache " - "directory. The cached files will then be located in '%s'.", - root_dir_to_delete, - cache_dir, - ) - return Path(xdg_cache_home) - - -_defaults: _Config = { - "apikey": "", - "server": "https://www.openml.org/api/v1/xml", - "cachedir": _resolve_default_cache_dir(), - "avoid_duplicate_runs": False, - "retry_policy": "human", - "connection_n_retries": 5, - "show_progress": False, -} - -# Default values are actually added here in the _setup() function which is -# called at the end of this module -server = _defaults["server"] - - -def get_server_base_url() -> str: - """Return the base URL of the currently configured server. - - Turns ``"https://api.openml.org/api/v1/xml"`` in ``"https://www.openml.org/"`` - and ``"https://test.openml.org/api/v1/xml"`` in ``"https://test.openml.org/"`` - - Returns - ------- - str - """ - domain, _path = server.split("/api", maxsplit=1) - return domain.replace("api", "www") - - -apikey: str = _defaults["apikey"] -show_progress: bool = _defaults["show_progress"] -# The current cache directory (without the server name) -_root_cache_directory: Path = Path(_defaults["cachedir"]) -avoid_duplicate_runs = _defaults["avoid_duplicate_runs"] - -retry_policy: Literal["human", "robot"] = _defaults["retry_policy"] -connection_n_retries: int = _defaults["connection_n_retries"] - - -def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None: - global retry_policy # noqa: PLW0603 - global connection_n_retries # noqa: PLW0603 - default_retries_by_policy = {"human": 5, "robot": 50} - - if value not in default_retries_by_policy: - raise ValueError( - f"Detected retry_policy '{value}' but must be one of " - f"{list(default_retries_by_policy.keys())}", - ) - if n_retries is not None and not isinstance(n_retries, int): - raise TypeError(f"`n_retries` must be of type `int` or `None` but is `{type(n_retries)}`.") - - if isinstance(n_retries, int) and n_retries < 1: - raise ValueError(f"`n_retries` is '{n_retries}' but must be positive.") - - retry_policy = value - connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries - - -class ConfigurationForExamples: - """Allows easy switching to and from a test configuration, used for examples.""" - - _last_used_server = None - _last_used_key = None - _start_last_called = False - _test_server = f"{TEST_SERVER_URL}/api/v1/xml" - _test_apikey = _TEST_SERVER_NORMAL_USER_KEY - - @classmethod - def start_using_configuration_for_example(cls) -> None: - """Sets the configuration to connect to the test server with valid apikey. - - To configuration as was before this call is stored, and can be recovered - by using the `stop_use_example_configuration` method. - """ - global server # noqa: PLW0603 - global apikey # noqa: PLW0603 - - if cls._start_last_called and server == cls._test_server and apikey == cls._test_apikey: - # Method is called more than once in a row without modifying the server or apikey. - # We don't want to save the current test configuration as a last used configuration. - return - - cls._last_used_server = server - cls._last_used_key = apikey - cls._start_last_called = True - - # Test server key for examples - server = cls._test_server - apikey = cls._test_apikey - warnings.warn( - f"Switching to the test server {server} to not upload results to the live server. " - "Using the test server may result in reduced performance of the API!", - stacklevel=2, - ) - - @classmethod - def stop_using_configuration_for_example(cls) -> None: - """Return to configuration as it was before `start_use_example_configuration`.""" - if not cls._start_last_called: - # We don't want to allow this because it will (likely) result in the `server` and - # `apikey` variables being set to None. - raise RuntimeError( - "`stop_use_example_configuration` called without a saved config." - "`start_use_example_configuration` must be called first.", - ) - - global server # noqa: PLW0603 - global apikey # noqa: PLW0603 - - server = cast("str", cls._last_used_server) - apikey = cast("str", cls._last_used_key) - cls._start_last_called = False - - -def _handle_xdg_config_home_backwards_compatibility( - xdg_home: str, -) -> Path: - # NOTE(eddiebergman): A previous bug results in the config - # file being located at `${XDG_CONFIG_HOME}/config` instead - # of `${XDG_CONFIG_HOME}/openml/config`. As to maintain backwards - # compatibility, where users may already may have had a configuration, - # we copy it over an issue a warning until it's deleted. - # As a heurisitic to ensure that it's "our" config file, we try parse it first. - config_dir = Path(xdg_home) / "openml" - - backwards_compat_config_file = Path(xdg_home) / "config" - if not backwards_compat_config_file.exists(): - return config_dir - - # If it errors, that's a good sign it's not ours and we can - # safely ignore it, jumping out of this block. This is a heurisitc - try: - _parse_config(backwards_compat_config_file) - except Exception: # noqa: BLE001 - return config_dir - - # Looks like it's ours, lets try copy it to the correct place - correct_config_location = config_dir / "config" - try: - # We copy and return the new copied location - shutil.copy(backwards_compat_config_file, correct_config_location) - openml_logger.warning( - "An openml configuration file was found at the old location " - f"at {backwards_compat_config_file}. We have copied it to the new " - f"location at {correct_config_location}. " - "\nTo silence this warning please verify that the configuration file " - f"at {correct_config_location} is correct and delete the file at " - f"{backwards_compat_config_file}." - ) - return config_dir - except Exception as e: # noqa: BLE001 - # We failed to copy and its ours, return the old one. - openml_logger.warning( - "While attempting to perform a backwards compatible fix, we " - f"failed to copy the openml config file at " - f"{backwards_compat_config_file}' to {correct_config_location}" - f"\n{type(e)}: {e}", - "\n\nTo silence this warning, please copy the file " - "to the new location and delete the old file at " - f"{backwards_compat_config_file}.", - ) - return backwards_compat_config_file - - -def determine_config_file_path() -> Path: - if platform.system().lower() == "linux": - xdg_home = os.environ.get("XDG_CONFIG_HOME") - if xdg_home is not None: - config_dir = _handle_xdg_config_home_backwards_compatibility(xdg_home) - else: - config_dir = Path("~", ".config", "openml") - else: - config_dir = Path("~") / ".openml" - - # Still use os.path.expanduser to trigger the mock in the unit test - config_dir = Path(config_dir).expanduser().resolve() - return config_dir / "config" - - -def _setup(config: _Config | None = None) -> None: - """Setup openml package. Called on first import. - - Reads the config file and sets up apikey, server, cache appropriately. - key and server can be set by the user simply using - openml.config.apikey = THEIRKEY - openml.config.server = SOMESERVER - We could also make it a property but that's less clear. - """ - global apikey # noqa: PLW0603 - global server # noqa: PLW0603 - global _root_cache_directory # noqa: PLW0603 - global avoid_duplicate_runs # noqa: PLW0603 - global show_progress # noqa: PLW0603 - - config_file = determine_config_file_path() - config_dir = config_file.parent - - # read config file, create directory for config file - try: - if not config_dir.exists(): - config_dir.mkdir(exist_ok=True, parents=True) - except PermissionError: - openml_logger.warning( - f"No permission to create OpenML directory at {config_dir}!" - " This can result in OpenML-Python not working properly." - ) - - if config is None: - config = _parse_config(config_file) - - avoid_duplicate_runs = config["avoid_duplicate_runs"] - apikey = config["apikey"] - server = config["server"] - show_progress = config["show_progress"] - n_retries = int(config["connection_n_retries"]) - - set_retry_policy(config["retry_policy"], n_retries) - - user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) - if user_defined_cache_dir is not None: - short_cache_dir = Path(user_defined_cache_dir) - else: - short_cache_dir = Path(config["cachedir"]) - _root_cache_directory = short_cache_dir.expanduser().resolve() - - try: - cache_exists = _root_cache_directory.exists() - # create the cache subdirectory - if not cache_exists: - _root_cache_directory.mkdir(exist_ok=True, parents=True) - _create_log_handlers() - except PermissionError: - openml_logger.warning( - f"No permission to create OpenML directory at {_root_cache_directory}!" - " This can result in OpenML-Python not working properly." - ) - _create_log_handlers(create_file_handler=False) - - -def set_field_in_config_file(field: str, value: Any) -> None: - """Overwrites the `field` in the configuration file with the new `value`.""" - if field not in _defaults: - raise ValueError(f"Field '{field}' is not valid and must be one of '{_defaults.keys()}'.") - - # TODO(eddiebergman): This use of globals has gone too far - globals()[field] = value - config_file = determine_config_file_path() - config = _parse_config(config_file) - with config_file.open("w") as fh: - for f in _defaults: - # We can't blindly set all values based on globals() because when the user - # sets it through config.FIELD it should not be stored to file. - # There doesn't seem to be a way to avoid writing defaults to file with configparser, - # because it is impossible to distinguish from an explicitly set value that matches - # the default value, to one that was set to its default because it was omitted. - value = globals()[f] if f == field else config.get(f) # type: ignore - if value is not None: - fh.write(f"{f} = {value}\n") - - -def _parse_config(config_file: str | Path) -> _Config: - """Parse the config file, set up defaults.""" - config_file = Path(config_file) - config = configparser.RawConfigParser(defaults=_defaults) # type: ignore - - # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. - # Cheat the ConfigParser module by adding a fake section header - config_file_ = StringIO() - config_file_.write("[FAKE_SECTION]\n") - try: - with config_file.open("r") as fh: - for line in fh: - config_file_.write(line) - except FileNotFoundError: - logger.info("No config file found at %s, using default configuration.", config_file) - except OSError as e: - logger.info("Error opening file %s: %s", config_file, e.args[0]) - config_file_.seek(0) - config.read_file(config_file_) - configuration = dict(config.items("FAKE_SECTION")) - for boolean_field in ["avoid_duplicate_runs", "show_progress"]: - if isinstance(config["FAKE_SECTION"][boolean_field], str): - configuration[boolean_field] = config["FAKE_SECTION"].getboolean(boolean_field) # type: ignore - return configuration # type: ignore - - -def get_config_as_dict() -> _Config: - return { - "apikey": apikey, - "server": server, - "cachedir": _root_cache_directory, - "avoid_duplicate_runs": avoid_duplicate_runs, - "connection_n_retries": connection_n_retries, - "retry_policy": retry_policy, - "show_progress": show_progress, - } - - -# NOTE: For backwards compatibility, we keep the `str` -def get_cache_directory() -> str: - """Get the current cache directory. - - This gets the cache directory for the current server relative - to the root cache directory that can be set via - ``set_root_cache_directory()``. The cache directory is the - ``root_cache_directory`` with additional information on which - subdirectory to use based on the server name. By default it is - ``root_cache_directory / org / openml / www`` for the standard - OpenML.org server and is defined as - ``root_cache_directory / top-level domain / second-level domain / - hostname`` - ``` - - Returns - ------- - cachedir : string - The current cache directory. - - """ - url_suffix = urlparse(server).netloc - url_parts = url_suffix.replace(":", "_").split(".")[::-1] - reversed_url_suffix = os.sep.join(url_parts) # noqa: PTH118 - return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118 - - -def set_root_cache_directory(root_cache_directory: str | Path) -> None: - """Set module-wide base cache directory. - - Sets the root cache directory, wherin the cache directories are - created to store content from different OpenML servers. For example, - by default, cached data for the standard OpenML.org server is stored - at ``root_cache_directory / org / openml / www``, and the general - pattern is ``root_cache_directory / top-level domain / second-level - domain / hostname``. - - Parameters - ---------- - root_cache_directory : string - Path to use as cache directory. - - See Also - -------- - get_cache_directory - """ - global _root_cache_directory # noqa: PLW0603 - _root_cache_directory = Path(root_cache_directory) - - -start_using_configuration_for_example = ( - ConfigurationForExamples.start_using_configuration_for_example -) -stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example - - -@contextmanager -def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: - """A context manager to temporarily override variables in the configuration.""" - existing_config = get_config_as_dict() - merged_config = {**existing_config, **config} - - _setup(merged_config) # type: ignore - yield merged_config # type: ignore - - _setup(existing_config) - - -__all__ = [ - "get_cache_directory", - "get_config_as_dict", - "set_root_cache_directory", - "start_using_configuration_for_example", - "stop_using_configuration_for_example", -] - -_setup() From 7841ea8eb35e6195bb8554676315a60697e39054 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 20 Feb 2026 14:09:45 +0530 Subject: [PATCH 116/156] added OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR --- openml/_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openml/_config.py b/openml/_config.py index efc765f60..1d3fad339 100644 --- a/openml/_config.py +++ b/openml/_config.py @@ -82,6 +82,7 @@ def __init__(self) -> None: self.OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" self.OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" self._TEST_SERVER_NORMAL_USER_KEY = "normaluser" + self.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY" self.TEST_SERVER_URL = "https://test.openml.org" self._config: OpenMLConfig = OpenMLConfig() From cc515aacb0797031b6c464dd2949584c93986b3a Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 20 Feb 2026 14:13:03 +0530 Subject: [PATCH 117/156] bug fixing --- tests/test_utils/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 367cd5551..38e004bfb 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -124,7 +124,7 @@ def test_list_all_few_results_available(_perform_api_call): @unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033") -@unittest.mock.patch("openml.utils.openml.config.get_cache_directory") +@unittest.mock.patch("openml.config.get_cache_directory") def test__create_cache_directory(config_mock, tmp_path): config_mock.return_value = tmp_path openml.utils._create_cache_directory("abc") From e6a92df7e8d35a8b1b4ddbc1a46f226291e04a93 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 20 Feb 2026 14:26:04 +0530 Subject: [PATCH 118/156] armagh fix --- openml/_config.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/openml/_config.py b/openml/_config.py index 1d3fad339..a897f17fc 100644 --- a/openml/_config.py +++ b/openml/_config.py @@ -12,10 +12,10 @@ import warnings from collections.abc import Iterator from contextlib import contextmanager -from dataclasses import dataclass, field, replace +from dataclasses import dataclass, field, fields, replace from io import StringIO from pathlib import Path -from typing import Any, Literal, cast +from typing import Any, ClassVar, Literal, cast from urllib.parse import urlparse logger = logging.getLogger(__name__) @@ -102,15 +102,7 @@ def __getattr__(self, name: str) -> Any: return getattr(self._config, name) raise AttributeError(f"{type(self).__name__!r} object has no attribute {name!r}") - _FIELDS = { # noqa: RUF012 - "apikey", - "server", - "cachedir", - "avoid_duplicate_runs", - "retry_policy", - "connection_n_retries", - "show_progress", - } + _FIELDS: ClassVar[set[str]] = {f.name for f in fields(OpenMLConfig)} def __setattr__(self, name: str, value: Any) -> None: # during __init__ before _config exists From 1b8c22ad38ef6c4ffc6b9d422ec13a044888d1bc Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 20 Feb 2026 19:15:32 +0500 Subject: [PATCH 119/156] update content_type check --- openml/_api/clients/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index cbb5d423a..1a583d39b 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -253,7 +253,7 @@ def _parse_exception_response( """ content_type = response.headers.get("Content-Type", "").lower() - if "json" in content_type: + if "application/json" in content_type: server_exception = response.json() server_error = server_exception["detail"] code = server_error.get("code") From fc839a6e6e680ed983974c9e30286f29d175efc9 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 20 Feb 2026 19:16:53 +0500 Subject: [PATCH 120/156] Revert "make delay functions static" This reverts commit 33b4ca0f103e0fa9d37368f6ee632d7e1f3217b9. --- openml/_api/clients/http.py | 6 +++--- openml/_api/clients/utils.py | 40 ------------------------------------ 2 files changed, 3 insertions(+), 43 deletions(-) delete mode 100644 openml/_api/clients/utils.py diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 1a583d39b..270fe2719 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -24,8 +24,6 @@ OpenMLServerNoResult, ) -from .utils import human_delay, robot_delay - class HTTPCache: """ @@ -230,7 +228,9 @@ def __init__( # noqa: PLR0913 self.retry_policy = retry_policy self.cache = cache - self.retry_func = human_delay if retry_policy == RetryPolicy.HUMAN else robot_delay + self.retry_func = ( + self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay + ) self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} def _parse_exception_response( diff --git a/openml/_api/clients/utils.py b/openml/_api/clients/utils.py deleted file mode 100644 index c21732504..000000000 --- a/openml/_api/clients/utils.py +++ /dev/null @@ -1,40 +0,0 @@ -from __future__ import annotations - -import math -import random - - -def robot_delay(n: int) -> float: - """ - Compute delay for automated retry policy. - - Parameters - ---------- - n : int - Current retry attempt number (1-based). - - Returns - ------- - float - Number of seconds to wait before the next retry. - """ - wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 - variation = random.gauss(0, wait / 10) - return max(1.0, wait + variation) - - -def human_delay(n: int) -> float: - """ - Compute delay for human-like retry policy. - - Parameters - ---------- - n : int - Current retry attempt number (1-based). - - Returns - ------- - float - Number of seconds to wait before the next retry. - """ - return max(1.0, n) From 1c922af27041c8b5a101b4edf94566c61f43974b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 20 Feb 2026 19:23:02 +0500 Subject: [PATCH 121/156] Revert "remove delay methods in HTTPClient" This reverts commit aefdb384fc93c1c6963c5935723e4eb2ae912742. --- openml/_api/clients/http.py | 41 +++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 270fe2719..595cef914 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -3,6 +3,8 @@ import hashlib import json import logging +import math +import random import time import xml from collections.abc import Callable, Mapping @@ -233,6 +235,45 @@ def __init__( # noqa: PLR0913 ) self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + def _robot_delay(self, n: int) -> float: + """ + Compute delay for automated retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + + Notes + ----- + Uses a sigmoid-based growth curve with Gaussian noise to gradually + increase waiting time. + """ + wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 + variation = random.gauss(0, wait / 10) + return max(1.0, wait + variation) + + def _human_delay(self, n: int) -> float: + """ + Compute delay for human-like retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + """ + return max(1.0, n) + def _parse_exception_response( self, response: Response, From a7b2d21c4a052e33a8dcd73f6613ea665fcb207a Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 20 Feb 2026 20:13:43 +0500 Subject: [PATCH 122/156] allow api_key=None --- openml/_api/clients/http.py | 35 ++++++++++++++--------------------- openml/_api/setup/config.py | 11 ++++++----- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 595cef914..d21009ec1 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -19,8 +19,8 @@ from openml.__version__ import __version__ from openml.enums import RetryPolicy from openml.exceptions import ( + OpenMLAuthenticationError, OpenMLHashException, - OpenMLNotAuthorizedError, OpenMLServerError, OpenMLServerException, OpenMLServerNoResult, @@ -203,8 +203,9 @@ class HTTPClient: Base server URL (e.g., ``https://www.openml.org``). base_url : str Base API path appended to the server URL. - api_key : str - API key used for authenticated endpoints. + api_key : str | None + API key used for authenticated endpoints. If None, authenticated + requests cannot be performed. retries : int Maximum number of retry attempts for failed requests. retry_policy : RetryPolicy @@ -218,7 +219,7 @@ def __init__( # noqa: PLR0913 *, server: str, base_url: str, - api_key: str, + api_key: str | None, retries: int, retry_policy: RetryPolicy, cache: HTTPCache, @@ -362,23 +363,6 @@ def _raise_code_specific_error( # file_elements['description'] is the XML file description of the flow message = f"\n{files['description']}\n{message}" - if code in [ - 102, # flow/exists post - 137, # dataset post - 350, # dataset/42 delete - 310, # flow/ post - 320, # flow/42 delete - 400, # run/42 delete - 460, # task/42 delete - ]: - raise OpenMLNotAuthorizedError( - message=( - f"The API call {url} requires authentication via an API key.\nPlease configure " - "OpenML-Python to use your API as described in this example:" - "\nhttps://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication" - ) - ) - # Propagate all server errors to the calling functions, except # for 107 which represents a database connection error. # These are typically caused by high server load, @@ -589,6 +573,15 @@ def request( # noqa: PLR0913, C901 data = request_kwargs.pop("data", {}).copy() if use_api_key: + if self.api_key is None: + raise OpenMLAuthenticationError( + message=( + f"The API call {url} requires authentication via an API key. " + "Please configure OpenML-Python to use your API " + "as described in this example: " + "https://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication" + ) + ) params["api_key"] = self.api_key if method.upper() in {"POST", "PUT", "PATCH"}: diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 5f6cd7891..5f73b7e9b 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -18,13 +18,14 @@ class APIConfig: Base server URL for the API. base_url : str API-specific base path appended to the server URL. - api_key : str - API key used for authentication. + api_key : str | None, default=None + API key used for authentication. If None, requests are made + without authentication. """ server: str base_url: str - api_key: str + api_key: str | None = None @dataclass @@ -74,12 +75,12 @@ class Config: APIVersion.V1: APIConfig( server="https://www.openml.org/", base_url="api/v1/xml/", - api_key="", + api_key=None, ), APIVersion.V2: APIConfig( server="http://localhost:8002/", base_url="", - api_key="", + api_key=None, ), } ) From 27fe790f8141448e6dcc6624930c703d1e64c8a5 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 20 Feb 2026 20:13:55 +0500 Subject: [PATCH 123/156] add tests for api_key=None --- tests/test_api/test_http.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 80001cc8d..cf582f24f 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -6,6 +6,7 @@ from pathlib import Path from urllib.parse import urljoin, urlparse from openml.enums import APIVersion +from openml.exceptions import OpenMLAuthenticationError from openml._api import HTTPClient @@ -123,6 +124,23 @@ def test_get_refresh_cache(self): self.assertEqual(response2.status_code, 200) self.assertEqual(response1.content, response2.content) + @pytest.mark.uses_test_server() + def test_get_with_api_key(self): + response = self.http_client.get("task/1", use_api_key=True) + + self.assertEqual(response.status_code, 200) + self.assertIn(b" Date: Tue, 24 Feb 2026 00:16:40 +0500 Subject: [PATCH 124/156] update cache not found message --- openml/_api/clients/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index d21009ec1..f8e794db3 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -118,7 +118,7 @@ def load(self, key: str) -> Response: path = self._key_to_path(key) if not path.exists(): - raise FileNotFoundError(f"Cache directory not found: {path}") + raise FileNotFoundError(f"Cache entry not found: {path}") meta_path = path / "meta.json" headers_path = path / "headers.json" From 72ea1a48701166ce7aac93b794cefe1bd09ea0d8 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 00:23:15 +0500 Subject: [PATCH 125/156] update docs for path in HTTPCache --- openml/_api/clients/http.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index f8e794db3..98ee62694 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -41,6 +41,18 @@ class HTTPCache: path : pathlib.Path Base directory where cache entries are stored. + Each request (cache enabled) is mapped to a subdirectory + under this path using the following scheme: + + - The domain is split into components and reversed + (e.g. ``www.openml.org`` → ``org/openml/www``). + - URL path segments are appended as directories. + - Query parameters (excluding ``api_key``) are URL-encoded + and appended as the final path component. + + The resulting directory contains three files: + ``meta.json``, ``headers.json``, and ``body.bin``. + Notes ----- The cache key is derived from the URL (domain and path components) and query From a696c491d2337f5c4bfbfac1217c00e286a0d7a7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 00:24:28 +0500 Subject: [PATCH 126/156] remove elapsed from cached meta --- openml/_api/clients/http.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 98ee62694..c69e74f84 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -187,7 +187,6 @@ def save(self, key: str, response: Response) -> None: "url": response.url, "reason": response.reason, "encoding": response.encoding, - "elapsed": response.elapsed.total_seconds(), "created_at": time.time(), "request": { "method": response.request.method if response.request else None, From 755636d5574dc902f6aaa9db682796744c2c3e60 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 00:27:28 +0500 Subject: [PATCH 127/156] move self.headers to _HEADERS --- openml/_api/clients/http.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index c69e74f84..e881a162d 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -26,6 +26,8 @@ OpenMLServerNoResult, ) +_HEADERS = {"user-agent": f"openml-python/{__version__}"} + class HTTPCache: """ @@ -245,7 +247,6 @@ def __init__( # noqa: PLR0913 self.retry_func = ( self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay ) - self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} def _robot_delay(self, n: int) -> float: """ @@ -601,7 +602,7 @@ def request( # noqa: PLR0913, C901 # prepare headers headers = request_kwargs.pop("headers", {}).copy() - headers.update(self.headers) + headers.update(_HEADERS) files = request_kwargs.pop("files", None) From d07af340af7d5cbda039d185e3b4c2c93d53e365 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 00:34:53 +0500 Subject: [PATCH 128/156] fix indentation in docstrings of _resolve_default_cache_dir --- openml/_api/setup/_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/openml/_api/setup/_utils.py b/openml/_api/setup/_utils.py index 678138b3e..2a4b2fc18 100644 --- a/openml/_api/setup/_utils.py +++ b/openml/_api/setup/_utils.py @@ -31,11 +31,11 @@ def _resolve_default_cache_dir() -> Path: is used as the cache directory. - On non-Linux systems, the default is ``~/.openml``. - On Linux, the function follows the XDG Base Directory Specification: - - Uses ``$XDG_CACHE_HOME/openml`` if ``XDG_CACHE_HOME`` is set. - - Falls back to ``~/.cache/openml`` if ``XDG_CACHE_HOME`` is not set. - - If an old cache directory exists at ``$XDG_CACHE_HOME/org/openml``, - a warning is logged for backward compatibility. In this case, - ``$XDG_CACHE_HOME`` is returned instead of ``$XDG_CACHE_HOME/openml``. + - Uses ``$XDG_CACHE_HOME/openml`` if ``XDG_CACHE_HOME`` is set. + - Falls back to ``~/.cache/openml`` if ``XDG_CACHE_HOME`` is not set. + - If an old cache directory exists at ``$XDG_CACHE_HOME/org/openml``, + a warning is logged for backward compatibility. In this case, + ``$XDG_CACHE_HOME`` is returned instead of ``$XDG_CACHE_HOME/openml``. """ user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") if user_defined_cache_dir is not None: From 2d9c8ec4c19064f316904f09c01aa7194413c93c Mon Sep 17 00:00:00 2001 From: Armaghan Shakir Date: Tue, 24 Feb 2026 00:45:28 +0500 Subject: [PATCH 129/156] Update openml/_api/clients/http.py Co-authored-by: Matthias Feurer --- openml/_api/clients/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index e881a162d..299e4cd05 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -371,7 +371,7 @@ def _raise_code_specific_error( raise OpenMLServerNoResult(code=code, message=message, url=url) # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow) - if code in [163] and files is not None and "description" in files: + if code == 163 and files is not None and "description" in files: # file_elements['description'] is the XML file description of the flow message = f"\n{files['description']}\n{message}" From 045d8961eab654cc5a76fb7c7ecd05671191acb9 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 01:07:15 +0500 Subject: [PATCH 130/156] move _handle_delete_exception and_get_endpoint_name, legal_resources --- openml/_api/resources/base/base.py | 78 ++++++++++++++- openml/_api/resources/base/versions.py | 132 +++++++------------------ 2 files changed, 115 insertions(+), 95 deletions(-) diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 68aae2162..625681e3b 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -3,7 +3,12 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, NoReturn -from openml.exceptions import OpenMLNotSupportedError +from openml.exceptions import ( + OpenMLNotAuthorizedError, + OpenMLNotSupportedError, + OpenMLServerError, + OpenMLServerException, +) if TYPE_CHECKING: from collections.abc import Mapping @@ -135,6 +140,77 @@ def untag(self, resource_id: int, tag: str) -> list[str]: Concrete subclasses must implement this method. """ + @abstractmethod + def _get_endpoint_name(self) -> str: + """ + Return the endpoint name for the current resource type. + + Returns + ------- + str + Endpoint segment used in API paths. + + Notes + ----- + Datasets use the special endpoint name ``"data"`` instead of + their enum value. + """ + + def _handle_delete_exception( + self, resource_type: str, exception: OpenMLServerException + ) -> None: + """ + Map V1 deletion error codes to more specific exceptions. + + Parameters + ---------- + resource_type : str + Endpoint name of the resource type. + exception : OpenMLServerException + Original exception raised during deletion. + + Raises + ------ + OpenMLNotAuthorizedError + If the resource cannot be deleted due to ownership or + dependent entities. + OpenMLServerError + If deletion fails for an unknown reason. + OpenMLServerException + If the error code is not specially handled. + """ + # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php + # Most exceptions are descriptive enough to be raised as their standard + # OpenMLServerException, however there are two cases where we add information: + # - a generic "failed" message, we direct them to the right issue board + # - when the user successfully authenticates with the server, + # but user is not allowed to take the requested action, + # in which case we specify a OpenMLNotAuthorizedError. + by_other_user = [323, 353, 393, 453, 594] + has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595] + unknown_reason = [325, 355, 394, 455, 593] + if exception.code in by_other_user: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because it was not uploaded by you." + ), + ) from exception + if exception.code in has_dependent_entities: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because " + f"it still has associated entities: {exception.message}" + ), + ) from exception + if exception.code in unknown_reason: + raise OpenMLServerError( + message=( + f"The {resource_type} can not be deleted for unknown reason," + " please open an issue at: https://github.com/openml/openml/issues/new" + ), + ) from exception + raise exception + def _not_supported(self, *, method: str) -> NoReturn: """ Raise an error indicating that a method is not supported. diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index dc41ba971..38e6596cd 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -7,13 +7,28 @@ from openml.enums import APIVersion, ResourceType from openml.exceptions import ( - OpenMLNotAuthorizedError, - OpenMLServerError, OpenMLServerException, ) from .base import ResourceAPI +_LEGAL_RESOURCES_DELETE = [ + ResourceType.DATASET, + ResourceType.TASK, + ResourceType.FLOW, + ResourceType.STUDY, + ResourceType.RUN, + ResourceType.USER, +] + +_LEGAL_RESOURCES_TAG = [ + ResourceType.DATASET, + ResourceType.TASK, + ResourceType.FLOW, + ResourceType.SETUP, + ResourceType.RUN, +] + class ResourceV1API(ResourceAPI): """ @@ -84,19 +99,17 @@ def delete(self, resource_id: int) -> bool: OpenMLServerException For other server-side errors. """ - resource_type = self._get_endpoint_name() + if self.resource_type not in _LEGAL_RESOURCES_DELETE: + raise ValueError(f"Can't delete a {self.resource_type.value}") - legal_resources = {"data", "flow", "task", "run", "study", "user"} - if resource_type not in legal_resources: - raise ValueError(f"Can't delete a {resource_type}") - - path = f"{resource_type}/{resource_id}" + endpoint_name = self._get_endpoint_name() + path = f"{endpoint_name}/{resource_id}" try: response = self._http.delete(path) result = xmltodict.parse(response.content) - return f"oml:{resource_type}_delete" in result + return f"oml:{endpoint_name}_delete" in result except OpenMLServerException as e: - self._handle_delete_exception(resource_type, e) + self._handle_delete_exception(endpoint_name, e) raise def tag(self, resource_id: int, tag: str) -> list[str]: @@ -122,17 +135,15 @@ def tag(self, resource_id: int, tag: str) -> list[str]: OpenMLServerException If the server returns an error. """ - resource_type = self._get_endpoint_name() - - legal_resources = {"data", "task", "flow", "setup", "run"} - if resource_type not in legal_resources: - raise ValueError(f"Can't tag a {resource_type}") + if self.resource_type not in _LEGAL_RESOURCES_TAG: + raise ValueError(f"Can't tag a {self.resource_type.value}") - path = f"{resource_type}/tag" - data = {f"{resource_type}_id": resource_id, "tag": tag} + endpoint_name = self._get_endpoint_name() + path = f"{endpoint_name}/tag" + data = {f"{endpoint_name}_id": resource_id, "tag": tag} response = self._http.post(path, data=data) - main_tag = f"oml:{resource_type}_tag" + main_tag = f"oml:{endpoint_name}_tag" parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) result = parsed_response[main_tag] tags: list[str] = result.get("oml:tag", []) @@ -162,17 +173,15 @@ def untag(self, resource_id: int, tag: str) -> list[str]: OpenMLServerException If the server returns an error. """ - resource_type = self._get_endpoint_name() + if self.resource_type not in _LEGAL_RESOURCES_TAG: + raise ValueError(f"Can't untag a {self.resource_type.value}") - legal_resources = {"data", "task", "flow", "setup", "run"} - if resource_type not in legal_resources: - raise ValueError(f"Can't untag a {resource_type}") - - path = f"{resource_type}/untag" - data = {f"{resource_type}_id": resource_id, "tag": tag} + endpoint_name = self._get_endpoint_name() + path = f"{endpoint_name}/untag" + data = {f"{endpoint_name}_id": resource_id, "tag": tag} response = self._http.post(path, data=data) - main_tag = f"oml:{resource_type}_untag" + main_tag = f"oml:{endpoint_name}_untag" parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) result = parsed_response[main_tag] tags: list[str] = result.get("oml:tag", []) @@ -180,78 +189,10 @@ def untag(self, resource_id: int, tag: str) -> list[str]: return tags def _get_endpoint_name(self) -> str: - """ - Return the V1 endpoint name for the current resource type. - - Returns - ------- - str - Endpoint segment used in V1 API paths. - - Notes - ----- - Datasets use the special endpoint name ``"data"`` instead of - their enum value. - """ if self.resource_type == ResourceType.DATASET: return "data" return cast("str", self.resource_type.value) - def _handle_delete_exception( - self, resource_type: str, exception: OpenMLServerException - ) -> None: - """ - Map V1 deletion error codes to more specific exceptions. - - Parameters - ---------- - resource_type : str - Endpoint name of the resource type. - exception : OpenMLServerException - Original exception raised during deletion. - - Raises - ------ - OpenMLNotAuthorizedError - If the resource cannot be deleted due to ownership or - dependent entities. - OpenMLServerError - If deletion fails for an unknown reason. - OpenMLServerException - If the error code is not specially handled. - """ - # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php - # Most exceptions are descriptive enough to be raised as their standard - # OpenMLServerException, however there are two cases where we add information: - # - a generic "failed" message, we direct them to the right issue board - # - when the user successfully authenticates with the server, - # but user is not allowed to take the requested action, - # in which case we specify a OpenMLNotAuthorizedError. - by_other_user = [323, 353, 393, 453, 594] - has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595] - unknown_reason = [325, 355, 394, 455, 593] - if exception.code in by_other_user: - raise OpenMLNotAuthorizedError( - message=( - f"The {resource_type} can not be deleted because it was not uploaded by you." - ), - ) from exception - if exception.code in has_dependent_entities: - raise OpenMLNotAuthorizedError( - message=( - f"The {resource_type} can not be deleted because " - f"it still has associated entities: {exception.message}" - ), - ) from exception - if exception.code in unknown_reason: - raise OpenMLServerError( - message=( - f"The {resource_type} can not be deleted for unknown reason," - " please open an issue at: https://github.com/openml/openml/issues/new" - ), - ) from exception - raise exception - def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: """ Extract the resource identifier from an XML upload response. @@ -317,3 +258,6 @@ def tag(self, resource_id: int, tag: str) -> list[str]: # noqa: ARG002 def untag(self, resource_id: int, tag: str) -> list[str]: # noqa: ARG002 self._not_supported(method="untag") + + def _get_endpoint_name(self) -> str: + return cast("str", self.resource_type.value) From c437966ad2273900b96a61f46f3bdd95e0dd27cf Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 01:13:01 +0500 Subject: [PATCH 131/156] set HTTPClient.headers --- openml/_api/clients/http.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 299e4cd05..512bcd56f 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -247,6 +247,7 @@ def __init__( # noqa: PLR0913 self.retry_func = ( self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay ) + self.headers = _HEADERS def _robot_delay(self, n: int) -> float: """ From e27470a5a07385ab1a73875a1090c859c4645486 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 01:17:14 +0500 Subject: [PATCH 132/156] remove main_tag --- openml/_api/resources/base/versions.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 38e6596cd..bba59b869 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -143,9 +143,8 @@ def tag(self, resource_id: int, tag: str) -> list[str]: data = {f"{endpoint_name}_id": resource_id, "tag": tag} response = self._http.post(path, data=data) - main_tag = f"oml:{endpoint_name}_tag" parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) - result = parsed_response[main_tag] + result = parsed_response[f"oml:{endpoint_name}_tag"] tags: list[str] = result.get("oml:tag", []) return tags @@ -181,9 +180,8 @@ def untag(self, resource_id: int, tag: str) -> list[str]: data = {f"{endpoint_name}_id": resource_id, "tag": tag} response = self._http.post(path, data=data) - main_tag = f"oml:{endpoint_name}_untag" parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) - result = parsed_response[main_tag] + result = parsed_response[f"oml:{endpoint_name}_untag"] tags: list[str] = result.get("oml:tag", []) return tags From d04d9560551f5227ec04b403cb13234c405ae6b7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 01:30:08 +0500 Subject: [PATCH 133/156] remove and merge TestAPIBase into TestBase --- openml/testing.py | 77 ++++++++++++++++----------------- tests/test_api/test_http.py | 4 +- tests/test_api/test_versions.py | 4 +- 3 files changed, 41 insertions(+), 44 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index 4bc5b25a6..00492e624 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -57,6 +57,11 @@ class TestBase(unittest.TestCase): logger = logging.getLogger("unit_tests_published_entities") logger.setLevel(logging.DEBUG) + # migration-specific attributes + cache: HTTPCache + http_clients: dict[APIVersion, HTTPClient] + minio_client: MinIOClient + def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: """Setup variables and temporary directories. @@ -111,6 +116,38 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: openml.config.set_retry_policy("robot", n_retries=20) openml.config._sync_api_config() + # migration-specific attributes + retries = self.connection_n_retries + retry_policy = RetryPolicy.HUMAN if self.retry_policy == "human" else RetryPolicy.ROBOT + cache_dir = self.static_cache_dir + + v1_server = self.test_server.split("api/")[0] + v1_base_url = self.test_server.replace(v1_server, "").rstrip("/") + "/" + v1_api_key = self.user_key + + self.cache = HTTPCache( + path=cache_dir, + ) + self.http_clients = { + APIVersion.V1: HTTPClient( + server=v1_server, + base_url=v1_base_url, + api_key=v1_api_key, + retries=retries, + retry_policy=retry_policy, + cache=self.cache, + ), + APIVersion.V2: HTTPClient( + server="http://localhost:8002/", + base_url="", + api_key="", + retries=retries, + retry_policy=retry_policy, + cache=self.cache, + ), + } + self.minio_client = MinIOClient(path=cache_dir) + def use_production_server(self) -> None: """ Use the production server for the OpenML API calls. @@ -280,46 +317,6 @@ def _check_fold_timing_evaluations( # noqa: PLR0913 assert evaluation >= min_val assert evaluation <= max_val - -class TestAPIBase(TestBase): - cache: HTTPCache - http_clients: dict[APIVersion, HTTPClient] - minio_client: MinIOClient - - def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: - super().setUp(n_levels=n_levels, tmpdir_suffix=tmpdir_suffix) - - retries = self.connection_n_retries - retry_policy = RetryPolicy.HUMAN if self.retry_policy == "human" else RetryPolicy.ROBOT - cache_dir = self.static_cache_dir - - v1_server = self.test_server.split("api/")[0] - v1_base_url = self.test_server.replace(v1_server, "").rstrip("/") + "/" - v1_api_key = self.user_key - - self.cache = HTTPCache( - path=cache_dir, - ) - self.http_clients = { - APIVersion.V1: HTTPClient( - server=v1_server, - base_url=v1_base_url, - api_key=v1_api_key, - retries=retries, - retry_policy=retry_policy, - cache=self.cache, - ), - APIVersion.V2: HTTPClient( - server="http://localhost:8002/", - base_url="", - api_key="", - retries=retries, - retry_policy=retry_policy, - cache=self.cache, - ), - } - self.minio_client = MinIOClient(path=cache_dir) - def _create_resource(self, api_version: APIVersion, resource_type: ResourceType) -> ResourceAPI: http_client = self.http_clients[api_version] resource_cls = API_REGISTRY[api_version][resource_type] diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index cf582f24f..9608a3cda 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -1,7 +1,7 @@ from requests import Response, Request, Session from unittest.mock import patch import pytest -from openml.testing import TestAPIBase +from openml.testing import TestBase import os from pathlib import Path from urllib.parse import urljoin, urlparse @@ -10,7 +10,7 @@ from openml._api import HTTPClient -class TestHTTPClient(TestAPIBase): +class TestHTTPClient(TestBase): http_client: HTTPClient def setUp(self): diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 2899cf6a7..6eec55874 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,13 +1,13 @@ import pytest from requests import Session, Response from unittest.mock import patch -from openml.testing import TestAPIBase +from openml.testing import TestBase from openml._api import FallbackProxy, ResourceAPI from openml.enums import ResourceType, APIVersion from openml.exceptions import OpenMLNotSupportedError -class TestResourceAPIBase(TestAPIBase): +class TestResourceAPIBase(TestBase): resource: ResourceAPI | FallbackProxy @property From 9263f7f51e4988276b49a5688bdfe689aa89fe15 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 01:59:05 +0500 Subject: [PATCH 134/156] minor change in TestHTTPClient.test_cache --- tests/test_api/test_http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 9608a3cda..4a9ca6fc3 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -44,7 +44,7 @@ def test_cache(self): # validate key self.assertEqual(key, expected_key) - # create fake response + # create mock response req = Request("GET", url).prepare() response = Response() response.status_code = 200 @@ -54,7 +54,7 @@ def test_cache(self): response.headers = {"Content-Type": "text/xml"} response.encoding = "utf-8" response.request = req - response.elapsed = type("Elapsed", (), {"total_seconds": lambda self: 0.1})() + response.elapsed = type("Elapsed", (), {"total_seconds": lambda x: 0.1})() # save to cache self.cache.save(key, response) From 79dea296aeac819fd1ae9ffa30ca4456d232c538 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 02:03:17 +0500 Subject: [PATCH 135/156] make HTTPClient.request private --- openml/_api/clients/http.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 512bcd56f..e36f3a557 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -467,7 +467,7 @@ def _validate_response( return exception - def _request( # noqa: PLR0913 + def __request( # noqa: PLR0913 self, session: requests.Session, method: str, @@ -535,7 +535,7 @@ def _request( # noqa: PLR0913 return response, exception - def request( # noqa: PLR0913, C901 + def _request( # noqa: PLR0913, C901 self, method: str, path: str, @@ -618,7 +618,7 @@ def request( # noqa: PLR0913, C901 with requests.Session() as session: for retry_counter in range(1, retries + 1): - response, exception = self._request( + response, exception = self.__request( session=session, method=method, url=url, @@ -709,7 +709,7 @@ def get( requests.Response HTTP response. """ - return self.request( + return self._request( method="GET", path=path, enable_cache=enable_cache, @@ -743,7 +743,7 @@ def post( requests.Response HTTP response. """ - return self.request( + return self._request( method="POST", path=path, enable_cache=False, @@ -771,7 +771,7 @@ def delete( requests.Response HTTP response. """ - return self.request( + return self._request( method="DELETE", path=path, enable_cache=False, From f6497c208bb65c2989d0d85e0dd0b021591a3cde Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 02:06:43 +0500 Subject: [PATCH 136/156] Revert "update FallbackProxy" This reverts commit 7ac16726c4b01aa4340d8aadabb2b8c28f7f0067. --- openml/_api/resources/base/fallback.py | 165 ++++++++++++++++++++----- openml/_api/setup/builder.py | 5 +- tests/test_api/test_versions.py | 5 +- 3 files changed, 137 insertions(+), 38 deletions(-) diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py index 6b714c030..9b8f64a17 100644 --- a/openml/_api/resources/base/fallback.py +++ b/openml/_api/resources/base/fallback.py @@ -1,61 +1,166 @@ from __future__ import annotations from collections.abc import Callable -from typing import TYPE_CHECKING, Any +from typing import Any from openml.exceptions import OpenMLNotSupportedError -if TYPE_CHECKING: - from .base import ResourceAPI - class FallbackProxy: """ - Proxy object that provides transparent fallback between two API versions. + Proxy object that provides transparent fallback across multiple API versions. + + This class delegates attribute access to a sequence of API implementations. + When a callable attribute is invoked and raises ``OpenMLNotSupportedError``, + the proxy automatically attempts the same method on subsequent API instances + until one succeeds. Parameters ---------- - primary_api : Any - Primary API implementation. - fallback_api : Any - Secondary API implementation used if the primary raises - ``OpenMLNotSupportedError``. + *api_versions : Any + One or more API implementation instances ordered by priority. + The first API is treated as the primary implementation, and + subsequent APIs are used as fallbacks. + + Raises + ------ + ValueError + If no API implementations are provided. + + Notes + ----- + Attribute lookup is performed dynamically via ``__getattr__``. + Only methods that raise ``OpenMLNotSupportedError`` trigger fallback + behavior. Other exceptions are propagated immediately. """ - def __init__(self, primary_api: ResourceAPI, fallback_api: ResourceAPI): - self._primary = primary_api - self._fallback = fallback_api + def __init__(self, *api_versions: Any): + if not api_versions: + raise ValueError("At least one API version must be provided") + self._apis = api_versions def __getattr__(self, name: str) -> Any: - primary_attr = getattr(self._primary, name, None) - fallback_attr = getattr(self._fallback, name, None) + """ + Dynamically resolve attribute access across API implementations. + + Parameters + ---------- + name : str + Name of the attribute being accessed. - if primary_attr is None and fallback_attr is None: - raise AttributeError(f"{self.__class__.__name__} has no attribute {name}") + Returns + ------- + Any + The resolved attribute. If it is callable, a wrapped function + providing fallback behavior is returned. - # If attribute exists on primary - if primary_attr is not None: - if callable(primary_attr): - return self._wrap_callable(name, primary_attr) - return primary_attr + Raises + ------ + AttributeError + If none of the API implementations define the attribute. + """ + api, attr = self._find_attr(name) + if callable(attr): + return self._wrap_callable(name, api, attr) + return attr - # Otherwise return fallback attribute directly - return fallback_attr + def _find_attr(self, name: str) -> tuple[Any, Any]: + """ + Find the first API implementation that defines a given attribute. + + Parameters + ---------- + name : str + Name of the attribute to search for. + + Returns + ------- + tuple of (Any, Any) + The API instance and the corresponding attribute. + + Raises + ------ + AttributeError + If no API implementation defines the attribute. + """ + for api in self._apis: + attr = getattr(api, name, None) + if attr is not None: + return api, attr + raise AttributeError(f"{self.__class__.__name__} has no attribute {name}") def _wrap_callable( self, name: str, + primary_api: Any, primary_attr: Callable[..., Any], ) -> Callable[..., Any]: + """ + Wrap a callable attribute to enable fallback behavior. + + Parameters + ---------- + name : str + Name of the method being wrapped. + primary_api : Any + Primary API instance providing the callable. + primary_attr : Callable[..., Any] + Callable attribute obtained from the primary API. + + Returns + ------- + Callable[..., Any] + Wrapped function that attempts the primary call first and + falls back to other APIs if ``OpenMLNotSupportedError`` is raised. + """ + def wrapper(*args: Any, **kwargs: Any) -> Any: try: return primary_attr(*args, **kwargs) except OpenMLNotSupportedError: - fallback_attr = getattr(self._fallback, name, None) - if callable(fallback_attr): - return fallback_attr(*args, **kwargs) - raise OpenMLNotSupportedError( - f"Method '{name}' not supported by primary or fallback API" - ) from None + return self._call_fallbacks(name, primary_api, *args, **kwargs) return wrapper + + def _call_fallbacks( + self, + name: str, + skip_api: Any, + *args: Any, + **kwargs: Any, + ) -> Any: + """ + Attempt to call a method on fallback API implementations. + + Parameters + ---------- + name : str + Name of the method to invoke. + skip_api : Any + API instance to skip (typically the primary API that already failed). + *args : Any + Positional arguments passed to the method. + **kwargs : Any + Keyword arguments passed to the method. + + Returns + ------- + Any + Result returned by the first successful fallback invocation. + + Raises + ------ + OpenMLNotSupportedError + If all API implementations either do not define the method + or raise ``OpenMLNotSupportedError``. + """ + for api in self._apis: + if api is skip_api: + continue + attr = getattr(api, name, None) + if callable(attr): + try: + return attr(*args, **kwargs) + except OpenMLNotSupportedError: + continue + raise OpenMLNotSupportedError(f"Could not fallback to any API for method: {name}") diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 0c96df877..aa6ed4bba 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -121,10 +121,7 @@ def build(cls, config: Config) -> APIBackendBuilder: ) merged: dict[ResourceType, FallbackProxy] = { - name: FallbackProxy( - primary_api=resource_apis[name], - fallback_api=fallback_resource_apis[name], - ) + name: FallbackProxy(resource_apis[name], fallback_resource_apis[name]) for name in resource_apis } diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 6eec55874..a31595457 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -164,10 +164,7 @@ def setUp(self): api_version=APIVersion.V2, resource_type=ResourceType.TASK, ) - self.resource = FallbackProxy( - primary_api=resource_v2, - fallback_api=resource_v1, - ) + self.resource = FallbackProxy(resource_v2, resource_v1) def test_publish(self): self._publish() From dce7f5481c4886a412d39eddd9f62b1769864c3a Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 02:13:19 +0500 Subject: [PATCH 137/156] use st_ctime instead of st_ctime for cache refresh test --- tests/test_api/test_http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 4a9ca6fc3..5ad3685a3 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -115,10 +115,10 @@ def test_get_refresh_cache(self): cache_path = self.cache._key_to_path(key) / "meta.json" response1 = self.http_client.get(path, enable_cache=True) - response1_cache_time_stamp = cache_path.stat().st_ctime + response1_cache_time_stamp = cache_path.stat().st_mtime response2 = self.http_client.get(path, enable_cache=True, refresh_cache=True) - response2_cache_time_stamp = cache_path.stat().st_ctime + response2_cache_time_stamp = cache_path.stat().st_mtime self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) self.assertEqual(response2.status_code, 200) From 0fc917c57c17485a5416e6bcdd92782ca028ef2c Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 24 Feb 2026 15:47:07 +0500 Subject: [PATCH 138/156] majore config refactor --- openml/_api/__init__.py | 6 - openml/_api/clients/http.py | 97 ++++++------ openml/_api/clients/minio.py | 19 +-- openml/_api/setup/__init__.py | 4 - openml/_api/setup/_utils.py | 74 --------- openml/_api/setup/backend.py | 147 +++--------------- openml/_api/setup/builder.py | 46 ++---- openml/_api/setup/config.py | 93 ----------- openml/config.py | 93 ++++++----- openml/testing.py | 38 +---- tests/conftest.py | 3 - tests/test_api/test_http.py | 23 ++- tests/test_datasets/test_dataset_functions.py | 6 - tests/test_openml/test_config.py | 9 +- 14 files changed, 164 insertions(+), 494 deletions(-) delete mode 100644 openml/_api/setup/_utils.py delete mode 100644 openml/_api/setup/config.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 60aa82762..7766016d1 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -40,9 +40,6 @@ from .setup import ( APIBackend, APIBackendBuilder, - APIConfig, - Config, - ConnectionConfig, _backend, ) @@ -50,9 +47,6 @@ "API_REGISTRY", "APIBackend", "APIBackendBuilder", - "APIConfig", - "Config", - "ConnectionConfig", "DatasetAPI", "DatasetV1API", "DatasetV2API", diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index e36f3a557..f1ed20e7c 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -16,8 +16,9 @@ import xmltodict from requests import Response +import openml from openml.__version__ import __version__ -from openml.enums import RetryPolicy +from openml.enums import APIVersion, RetryPolicy from openml.exceptions import ( OpenMLAuthenticationError, OpenMLHashException, @@ -26,7 +27,7 @@ OpenMLServerNoResult, ) -_HEADERS = {"user-agent": f"openml-python/{__version__}"} +_HEADERS: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} class HTTPCache: @@ -38,31 +39,15 @@ class HTTPCache: three files: metadata (``meta.json``), headers (``headers.json``), and the raw body (``body.bin``). - Parameters - ---------- - path : pathlib.Path - Base directory where cache entries are stored. - - Each request (cache enabled) is mapped to a subdirectory - under this path using the following scheme: - - - The domain is split into components and reversed - (e.g. ``www.openml.org`` → ``org/openml/www``). - - URL path segments are appended as directories. - - Query parameters (excluding ``api_key``) are URL-encoded - and appended as the final path component. - - The resulting directory contains three files: - ``meta.json``, ``headers.json``, and ``body.bin``. - Notes ----- The cache key is derived from the URL (domain and path components) and query parameters, excluding the ``api_key`` parameter. """ - def __init__(self, *, path: Path) -> None: - self.path = path + @property + def path(self) -> Path: + return Path(openml.config.get_cache_directory()) def get_key(self, url: str, params: dict[str, Any]) -> str: """ @@ -212,42 +197,48 @@ class HTTPClient: Parameters ---------- - server : str - Base server URL (e.g., ``https://www.openml.org``). - base_url : str - Base API path appended to the server URL. - api_key : str | None - API key used for authenticated endpoints. If None, authenticated - requests cannot be performed. - retries : int - Maximum number of retry attempts for failed requests. - retry_policy : RetryPolicy - Strategy controlling delay between retries. - cache : HTTPCache or None, optional - Cache instance for storing and retrieving responses. + api_version : APIVersion + Backend API Version. """ - def __init__( # noqa: PLR0913 + def __init__( self, *, - server: str, - base_url: str, - api_key: str | None, - retries: int, - retry_policy: RetryPolicy, - cache: HTTPCache, + api_version: APIVersion, ) -> None: - self.server = server - self.base_url = base_url - self.api_key = api_key - self.retries = retries - self.retry_policy = retry_policy - self.cache = cache - - self.retry_func = ( - self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay - ) - self.headers = _HEADERS + self.api_version = api_version + + self.cache = HTTPCache() + + @property + def server(self) -> str: + server = openml.config.SERVERS[self.api_version]["server"] + if server is None: + raise ValueError( + f"server found to be None for api_version={self.api_version}" + f" in {openml.config.SERVERS}" + ) + return server + + @property + def api_key(self) -> str | None: + return openml.config.SERVERS[self.api_version]["apikey"] + + @property + def retries(self) -> int: + return openml.config.connection_n_retries + + @property + def retry_policy(self) -> RetryPolicy: + return RetryPolicy.HUMAN if openml.config.retry_policy == "human" else RetryPolicy.ROBOT + + @property + def retry_func(self) -> Callable: + return self._human_delay if self.retry_policy == RetryPolicy.HUMAN else self._robot_delay + + @property + def headers(self) -> dict[str, str]: + return _HEADERS def _robot_delay(self, n: int) -> float: """ @@ -579,7 +570,7 @@ def _request( # noqa: PLR0913, C901 OpenMLHashException If checksum verification fails. """ - url = urljoin(self.server, urljoin(self.base_url, path)) + url = urljoin(self.server, path) retries = max(1, self.retries) params = request_kwargs.pop("params", {}).copy() diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py index e6a94a6e4..baaf91abd 100644 --- a/openml/_api/clients/minio.py +++ b/openml/_api/clients/minio.py @@ -2,8 +2,11 @@ from pathlib import Path +import openml from openml.__version__ import __version__ +_HEADERS: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + class MinIOClient: """ @@ -14,12 +17,6 @@ class MinIOClient: default HTTP headers. It is intended to be extended with actual request or storage logic elsewhere. - Parameters - ---------- - path : pathlib.Path or None, optional - Base path used for local storage or downloads. If ``None``, no - default path is configured. - Attributes ---------- path : pathlib.Path or None @@ -29,6 +26,10 @@ class MinIOClient: OpenML Python client version. """ - def __init__(self, path: Path) -> None: - self.path = path - self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + @property + def path(self) -> Path: + return Path(openml.config.get_cache_directory()) + + @property + def headers(self) -> dict[str, str]: + return _HEADERS diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py index 4c7fce119..80545824f 100644 --- a/openml/_api/setup/__init__.py +++ b/openml/_api/setup/__init__.py @@ -1,14 +1,10 @@ from .backend import APIBackend from .builder import APIBackendBuilder -from .config import APIConfig, Config, ConnectionConfig _backend = APIBackend.get_instance() __all__ = [ "APIBackend", "APIBackendBuilder", - "APIConfig", - "Config", - "ConnectionConfig", "_backend", ] diff --git a/openml/_api/setup/_utils.py b/openml/_api/setup/_utils.py deleted file mode 100644 index 2a4b2fc18..000000000 --- a/openml/_api/setup/_utils.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import annotations - -import logging -import os -import platform -from pathlib import Path - -openml_logger = logging.getLogger("openml") - -# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) -_user_path = Path("~").expanduser().absolute() - - -def _resolve_default_cache_dir() -> Path: - """ - Determine the default cache directory for OpenML data. - - This function checks for user-defined environment variables and - platform-specific defaults to resolve where cached files should - be stored. It also provides backward-compatibility warnings if - legacy directories are detected. - - Returns - ------- - Path - Path to the cache directory that should be used. - - Notes - ----- - - If the environment variable ``OPENML_CACHE_DIR`` is set, its value - is used as the cache directory. - - On non-Linux systems, the default is ``~/.openml``. - - On Linux, the function follows the XDG Base Directory Specification: - - Uses ``$XDG_CACHE_HOME/openml`` if ``XDG_CACHE_HOME`` is set. - - Falls back to ``~/.cache/openml`` if ``XDG_CACHE_HOME`` is not set. - - If an old cache directory exists at ``$XDG_CACHE_HOME/org/openml``, - a warning is logged for backward compatibility. In this case, - ``$XDG_CACHE_HOME`` is returned instead of ``$XDG_CACHE_HOME/openml``. - """ - user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") - if user_defined_cache_dir is not None: - return Path(user_defined_cache_dir) - - if platform.system().lower() != "linux": - return _user_path / ".openml" - - xdg_cache_home = os.environ.get("XDG_CACHE_HOME") - if xdg_cache_home is None: - return Path("~", ".cache", "openml").expanduser() - - # This is the proper XDG_CACHE_HOME directory, but - # we unfortunately had a problem where we used XDG_CACHE_HOME/org, - # we check heuristically if this old directory still exists and issue - # a warning if it does. There's too much data to move to do this for the user. - - # The new cache directory exists - cache_dir = Path(xdg_cache_home) / "openml" - if cache_dir.exists(): - return cache_dir - - # The old cache directory *does not* exist - heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" - if not heuristic_dir_for_backwards_compat.exists(): - return cache_dir - - root_dir_to_delete = Path(xdg_cache_home) / "org" - openml_logger.warning( - "An old cache directory was found at '%s'. This directory is no longer used by " - "OpenML-Python. To silence this warning you would need to delete the old cache " - "directory. The cached files will then be located in '%s'.", - root_dir_to_delete, - cache_dir, - ) - return Path(xdg_cache_home) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index 56f689c03..dd94a4a79 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -1,10 +1,10 @@ from __future__ import annotations -from copy import deepcopy -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, ClassVar, cast + +import openml from .builder import APIBackendBuilder -from .config import Config if TYPE_CHECKING: from openml._api.resources import ( @@ -57,11 +57,23 @@ class APIBackend: Interface for setup-related API operations. """ - _instance: APIBackend | None = None + _instance: ClassVar[APIBackend | None] = None + _backends: ClassVar[dict[str, APIBackendBuilder]] = {} + + @property + def _backend(self) -> APIBackendBuilder: + api_version = openml.config.api_version + fallback_api_version = openml.config.fallback_api_version + key = f"{api_version}_{fallback_api_version}" + + if key not in self._backends: + _backend = APIBackendBuilder.build( + api_version=api_version, + fallback_api_version=fallback_api_version, + ) + self._backends[key] = _backend - def __init__(self, config: Config | None = None): - self._config: Config = config or Config() - self._backend = APIBackendBuilder.build(self._config) + return self._backends[key] @property def dataset(self) -> DatasetAPI: @@ -112,124 +124,3 @@ def get_instance(cls) -> APIBackend: if cls._instance is None: cls._instance = cls() return cls._instance - - @classmethod - def get_config(cls) -> Config: - """ - Get a deep copy of the current configuration. - - Returns - ------- - Config - Current configuration object. - """ - return deepcopy(cls.get_instance()._config) - - @classmethod - def set_config(cls, config: Config) -> None: - """ - Set a new configuration for the backend. - - This updates both the internal ``_config`` object and rebuilds - the internal API backend using ``APIBackendBuilder``. - - Parameters - ---------- - config : Config - Configuration object to set. - """ - instance = cls.get_instance() - instance._config = config - instance._backend = APIBackendBuilder.build(config) - - @classmethod - def get_config_value(cls, key: str) -> Any: - """ - Retrieve a specific configuration value by key. - - Parameters - ---------- - key : str - Dot-separated key specifying the configuration field. - - Returns - ------- - Any - Deep copy of the requested configuration value. - """ - keys = key.split(".") - config_value = cls.get_instance()._config - for k in keys: - if isinstance(config_value, dict): - config_value = config_value[k] - else: - config_value = getattr(config_value, k) - return deepcopy(config_value) - - @classmethod - def set_config_value(cls, key: str, value: Any) -> None: - """ - Set a specific configuration value by key. - - Parameters - ---------- - key : str - Dot-separated key specifying the configuration field. - value : Any - Value to assign to the configuration field. - """ - keys = key.split(".") - config = cls.get_instance()._config - parent = config - for k in keys[:-1]: - parent = parent[k] if isinstance(parent, dict) else getattr(parent, k) - if isinstance(parent, dict): - parent[keys[-1]] = value - else: - setattr(parent, keys[-1], value) - cls.set_config(config) - - @classmethod - def get_config_values(cls, keys: list[str]) -> list[Any]: - """ - Retrieve multiple configuration values by a list of keys. - - Parameters - ---------- - keys : list of str - List of dot-separated keys specifying configuration fields. - - Returns - ------- - list of Any - List of deep copies of the requested configuration values. - """ - values = [] - for key in keys: - value = cls.get_config_value(key) - values.append(value) - return values - - @classmethod - def set_config_values(cls, config_dict: dict[str, Any]) -> None: - """ - Set multiple configuration values using a dictionary. - - Parameters - ---------- - config_dict : dict of str to Any - Mapping of dot-separated configuration keys to their values. - """ - config = cls.get_instance()._config - - for key, value in config_dict.items(): - keys = key.split(".") - parent = config - for k in keys[:-1]: - parent = parent[k] if isinstance(parent, dict) else getattr(parent, k) - if isinstance(parent, dict): - parent[keys[-1]] = value - else: - setattr(parent, keys[-1], value) - - cls.set_config(config) diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index aa6ed4bba..0d55de85f 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -1,15 +1,10 @@ from __future__ import annotations from collections.abc import Mapping -from pathlib import Path -from typing import TYPE_CHECKING -from openml._api.clients import HTTPCache, HTTPClient, MinIOClient +from openml._api.clients import HTTPClient, MinIOClient from openml._api.resources import API_REGISTRY, FallbackProxy, ResourceAPI -from openml.enums import ResourceType - -if TYPE_CHECKING: - from .config import Config +from openml.enums import APIVersion, ResourceType class APIBackendBuilder: @@ -63,7 +58,11 @@ def __init__( self.setup = resource_apis[ResourceType.SETUP] @classmethod - def build(cls, config: Config) -> APIBackendBuilder: + def build( + cls, + api_version: APIVersion, + fallback_api_version: APIVersion | None, + ) -> APIBackendBuilder: """ Construct an APIBackendBuilder instance from a configuration. @@ -82,40 +81,21 @@ def build(cls, config: Config) -> APIBackendBuilder: APIBackendBuilder Builder instance with all resource API interfaces initialized. """ - cache_dir = Path(config.cache_dir).expanduser() - - http_cache = HTTPCache(path=cache_dir) - minio_client = MinIOClient(path=cache_dir) + minio_client = MinIOClient() - primary_api_config = config.api_configs[config.api_version] - primary_http_client = HTTPClient( - server=primary_api_config.server, - base_url=primary_api_config.base_url, - api_key=primary_api_config.api_key, - retries=config.connection.retries, - retry_policy=config.connection.retry_policy, - cache=http_cache, - ) + primary_http_client = HTTPClient(api_version=api_version) resource_apis: dict[ResourceType, ResourceAPI] = {} - for resource_type, resource_api_cls in API_REGISTRY[config.api_version].items(): + for resource_type, resource_api_cls in API_REGISTRY[api_version].items(): resource_apis[resource_type] = resource_api_cls(primary_http_client, minio_client) - if config.fallback_api_version is None: + if fallback_api_version is None: return cls(resource_apis) - fallback_api_config = config.api_configs[config.fallback_api_version] - fallback_http_client = HTTPClient( - server=fallback_api_config.server, - base_url=fallback_api_config.base_url, - api_key=fallback_api_config.api_key, - retries=config.connection.retries, - retry_policy=config.connection.retry_policy, - cache=http_cache, - ) + fallback_http_client = HTTPClient(api_version=fallback_api_version) fallback_resource_apis: dict[ResourceType, ResourceAPI] = {} - for resource_type, resource_api_cls in API_REGISTRY[config.fallback_api_version].items(): + for resource_type, resource_api_cls in API_REGISTRY[fallback_api_version].items(): fallback_resource_apis[resource_type] = resource_api_cls( fallback_http_client, minio_client ) diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py deleted file mode 100644 index 5f73b7e9b..000000000 --- a/openml/_api/setup/config.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field - -from openml.enums import APIVersion, RetryPolicy - -from ._utils import _resolve_default_cache_dir - - -@dataclass -class APIConfig: - """ - Configuration for a specific OpenML API version. - - Parameters - ---------- - server : str - Base server URL for the API. - base_url : str - API-specific base path appended to the server URL. - api_key : str | None, default=None - API key used for authentication. If None, requests are made - without authentication. - """ - - server: str - base_url: str - api_key: str | None = None - - -@dataclass -class ConnectionConfig: - """ - Configuration for HTTP connection behavior. - - Parameters - ---------- - retries : int - Number of retry attempts for failed requests. - retry_policy : RetryPolicy - Policy for determining delays between retries (human-like or robot-like). - """ - - retries: int - retry_policy: RetryPolicy - - -@dataclass -class Config: - """ - Global configuration for the OpenML Python client. - - Includes API versions, connection settings, and caching options. - - Attributes - ---------- - api_version : APIVersion - Primary API version to use (default is V1). - fallback_api_version : APIVersion or None - Optional fallback API version if the primary API does not support certain operations. - cache_dir : str - Path to the directory where cached files will be stored. - api_configs : dict of APIVersion to APIConfig - Mapping from API version to its server/base URL and API key configuration. - connection : ConnectionConfig - Settings for request retries and retry policy. - """ - - api_version: APIVersion = APIVersion.V1 - fallback_api_version: APIVersion | None = None - cache_dir: str = str(_resolve_default_cache_dir()) - - api_configs: dict[APIVersion, APIConfig] = field( - default_factory=lambda: { - APIVersion.V1: APIConfig( - server="https://www.openml.org/", - base_url="api/v1/xml/", - api_key=None, - ), - APIVersion.V2: APIConfig( - server="http://localhost:8002/", - base_url="", - api_key=None, - ), - } - ) - - connection: ConnectionConfig = field( - default_factory=lambda: ConnectionConfig( - retries=5, - retry_policy=RetryPolicy.HUMAN, - ) - ) diff --git a/openml/config.py b/openml/config.py index d80c5bfda..af06b3a32 100644 --- a/openml/config.py +++ b/openml/config.py @@ -18,22 +18,61 @@ from typing_extensions import TypedDict from urllib.parse import urlparse -from openml.enums import RetryPolicy +from openml.enums import APIVersion logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") console_handler: logging.StreamHandler | None = None file_handler: logging.handlers.RotatingFileHandler | None = None + +SERVERS_REGISTRY: dict[str, dict[APIVersion, dict[str, str | None]]] = { + "production": { + APIVersion.V1: { + "server": "https://www.openml.org/api/v1/xml/", + "apikey": None, + }, + APIVersion.V2: { + "server": None, + "apikey": None, + }, + }, + "test": { + APIVersion.V1: { + "server": "https://test.openml.org/api/v1/xml/", + "apikey": "normaluser", + }, + APIVersion.V2: { + "server": None, + "apikey": None, + }, + }, + "local": { + APIVersion.V1: { + "server": "http://localhost:8000/api/v1/xml/", + "apikey": "normaluser", + }, + APIVersion.V2: { + "server": "http://localhost:8002/api/v1/xml/", + "apikey": "normaluser", + }, + }, +} + +SERVERS: dict[APIVersion, dict[str, str | None]] = SERVERS_REGISTRY["production"] + + OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY" -_TEST_SERVER_NORMAL_USER_KEY = "normaluser" +_TEST_SERVER_NORMAL_USER_KEY = SERVERS_REGISTRY["test"][APIVersion.V1]["apikey"] -TEST_SERVER_URL = "https://test.openml.org" +TEST_SERVER_URL = SERVERS_REGISTRY["test"][APIVersion.V1]["server"].split("api/v1/xml")[0] class _Config(TypedDict): + api_version: APIVersion + fallback_api_version: APIVersion | None apikey: str server: str cachedir: Path @@ -154,8 +193,10 @@ def _resolve_default_cache_dir() -> Path: _defaults: _Config = { - "apikey": "", - "server": "https://www.openml.org/api/v1/xml", + "api_version": APIVersion.V1, + "fallback_api_version": None, + "apikey": SERVERS[APIVersion.V1]["apikey"], + "server": SERVERS[APIVersion.V1]["server"], "cachedir": _resolve_default_cache_dir(), "avoid_duplicate_runs": False, "retry_policy": "human", @@ -182,6 +223,8 @@ def get_server_base_url() -> str: return domain.replace("api", "www") +api_version: APIVersion = _defaults["api_version"] +fallback_api_version: APIVersion | None = _defaults["fallback_api_version"] apikey: str = _defaults["apikey"] show_progress: bool = _defaults["show_progress"] # The current cache directory (without the server name) @@ -211,8 +254,6 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N retry_policy = value connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries - _sync_api_config() - class ConfigurationForExamples: """Allows easy switching to and from a test configuration, used for examples.""" @@ -251,8 +292,6 @@ def start_using_configuration_for_example(cls) -> None: stacklevel=2, ) - _sync_api_config() - @classmethod def stop_using_configuration_for_example(cls) -> None: """Return to configuration as it was before `start_use_example_configuration`.""" @@ -271,8 +310,6 @@ def stop_using_configuration_for_example(cls) -> None: apikey = cast("str", cls._last_used_key) cls._start_last_called = False - _sync_api_config() - def _handle_xdg_config_home_backwards_compatibility( xdg_home: str, @@ -348,6 +385,8 @@ def _setup(config: _Config | None = None) -> None: openml.config.server = SOMESERVER We could also make it a property but that's less clear. """ + global api_version # noqa: PLW0603 + global fallback_api_version # noqa: PLW0603 global apikey # noqa: PLW0603 global server # noqa: PLW0603 global _root_cache_directory # noqa: PLW0603 @@ -371,6 +410,8 @@ def _setup(config: _Config | None = None) -> None: config = _parse_config(config_file) avoid_duplicate_runs = config["avoid_duplicate_runs"] + api_version = config["api_version"] + fallback_api_version = config["fallback_api_version"] apikey = config["apikey"] server = config["server"] show_progress = config["show_progress"] @@ -385,8 +426,6 @@ def _setup(config: _Config | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() - _sync_api_config() - try: cache_exists = _root_cache_directory.exists() # create the cache subdirectory @@ -421,8 +460,6 @@ def set_field_in_config_file(field: str, value: Any) -> None: if value is not None: fh.write(f"{f} = {value}\n") - _sync_api_config() - def _parse_config(config_file: str | Path) -> _Config: """Parse the config file, set up defaults.""" @@ -452,6 +489,8 @@ def _parse_config(config_file: str | Path) -> _Config: def get_config_as_dict() -> _Config: return { + "api_version": api_version, + "fallback_api_version": fallback_api_version, "apikey": apikey, "server": server, "cachedir": _root_cache_directory, @@ -511,8 +550,6 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: global _root_cache_directory # noqa: PLW0603 _root_cache_directory = Path(root_cache_directory) - _sync_api_config() - start_using_configuration_for_example = ( ConfigurationForExamples.start_using_configuration_for_example @@ -532,28 +569,6 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: _setup(existing_config) -def _sync_api_config() -> None: - """Sync the new API config with the legacy config in this file.""" - from ._api import APIBackend - - p = urlparse(server) - v1_server = f"{p.scheme}://{p.netloc}/" - v1_base_url = p.path.rstrip("/") + "/" # requirement for urllib.parse.urljoin - connection_retry_policy = RetryPolicy.HUMAN if retry_policy == "human" else RetryPolicy.ROBOT - cache_dir = str(_root_cache_directory) - - APIBackend.set_config_values( - { - "cache_dir": cache_dir, - "api_configs.v1.server": v1_server, - "api_configs.v1.base_url": v1_base_url, - "api_configs.v1.api_key": apikey, - "connection.retry_policy": connection_retry_policy, - "connection.retries": connection_n_retries, - } - ) - - __all__ = [ "get_cache_directory", "get_config_as_dict", diff --git a/openml/testing.py b/openml/testing.py index 00492e624..9d81bc6d6 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -16,7 +16,7 @@ import openml from openml._api import API_REGISTRY, HTTPCache, HTTPClient, MinIOClient, ResourceAPI -from openml.enums import APIVersion, ResourceType, RetryPolicy +from openml.enums import APIVersion, ResourceType from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -114,39 +114,13 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: self.retry_policy = openml.config.retry_policy self.connection_n_retries = openml.config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) - openml.config._sync_api_config() - # migration-specific attributes - retries = self.connection_n_retries - retry_policy = RetryPolicy.HUMAN if self.retry_policy == "human" else RetryPolicy.ROBOT - cache_dir = self.static_cache_dir - - v1_server = self.test_server.split("api/")[0] - v1_base_url = self.test_server.replace(v1_server, "").rstrip("/") + "/" - v1_api_key = self.user_key - - self.cache = HTTPCache( - path=cache_dir, - ) + self.cache = HTTPCache() self.http_clients = { - APIVersion.V1: HTTPClient( - server=v1_server, - base_url=v1_base_url, - api_key=v1_api_key, - retries=retries, - retry_policy=retry_policy, - cache=self.cache, - ), - APIVersion.V2: HTTPClient( - server="http://localhost:8002/", - base_url="", - api_key="", - retries=retries, - retry_policy=retry_policy, - cache=self.cache, - ), + APIVersion.V1: HTTPClient(api_version=APIVersion.V1), + APIVersion.V2: HTTPClient(api_version=APIVersion.V2), } - self.minio_client = MinIOClient(path=cache_dir) + self.minio_client = MinIOClient() def use_production_server(self) -> None: """ @@ -156,7 +130,6 @@ def use_production_server(self) -> None: """ openml.config.server = self.production_server openml.config.apikey = "" - openml.config._sync_api_config() def tearDown(self) -> None: """Tear down the test""" @@ -170,7 +143,6 @@ def tearDown(self) -> None: openml.config.connection_n_retries = self.connection_n_retries openml.config.retry_policy = self.retry_policy - openml.config._sync_api_config() @classmethod def _mark_entity_for_removal( diff --git a/tests/conftest.py b/tests/conftest.py index 5f1e0e743..2a7a6dcc7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -99,7 +99,6 @@ def delete_remote_files(tracker, flow_names) -> None: """ openml.config.server = TestBase.test_server openml.config.apikey = TestBase.user_key - openml.config._sync_api_config() # reordering to delete sub flows at the end of flows # sub-flows have shorter names, hence, sorting by descending order of flow name length @@ -276,12 +275,10 @@ def with_server(request): if "production_server" in request.keywords: openml.config.server = "https://www.openml.org/api/v1/xml" openml.config.apikey = None - openml.config._sync_api_config() yield return openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml" openml.config.apikey = TestBase.user_key - openml.config._sync_api_config() yield diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 5ad3685a3..cf8b8d9e5 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -8,6 +8,7 @@ from openml.enums import APIVersion from openml.exceptions import OpenMLAuthenticationError from openml._api import HTTPClient +import openml class TestHTTPClient(TestBase): @@ -19,8 +20,7 @@ def setUp(self): def _prepare_url(self, path: str | None = None) -> str: server = self.http_client.server - base_url = self.http_client.base_url - return urljoin(server, urljoin(base_url, path)) + return urljoin(server, path) def test_cache(self): path = "task/31" @@ -28,16 +28,15 @@ def test_cache(self): url = self._prepare_url(path=path) - server_keys = urlparse(self.http_client.server).netloc.split(".")[::-1] - base_url_keys = self.http_client.base_url.strip("/").split("/") - path_keys = path.split("/") + parsed_url = urlparse(url) + netloc_parts = parsed_url.netloc.split(".")[::-1] + path_parts = parsed_url.path.strip("/").split("/") params_key = "&".join([f"{k}={v}" for k, v in params.items()]) key = self.cache.get_key(url, params) expected_key = os.path.join( - *server_keys, - *base_url_keys, - *path_keys, + *netloc_parts, + *path_parts, params_key, ) @@ -133,13 +132,13 @@ def test_get_with_api_key(self): @pytest.mark.uses_test_server() def test_get_without_api_key_raises(self): - api_key = self.http_client.api_key - self.http_client.api_key = None + api_key = openml.config.SERVERS[APIVersion.V1]["api_key"] + openml.config.SERVERS[APIVersion.V1]["api_key"] = None with pytest.raises(OpenMLAuthenticationError): self.http_client.get("task/1", use_api_key=True) - self.http_client.api_key = api_key + openml.config.SERVERS[APIVersion.V1]["api_key"] = api_key @pytest.mark.uses_test_server() def test_download_creates_file(self): @@ -207,7 +206,7 @@ def test_post(self): mock_request.assert_called_once_with( method="POST", - url=self.http_client.server + self.http_client.base_url + resource_name, + url=urljoin(self.http_client.server, resource_name), params={}, data={'api_key': self.http_client.api_key}, headers=self.http_client.headers, diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 573d1e6b7..151a9ac23 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -158,7 +158,6 @@ def test_check_datasets_active(self): [79], ) openml.config.server = self.test_server - openml.config._sync_api_config() @pytest.mark.test_server() def test_illegal_character_tag(self): @@ -187,7 +186,6 @@ def test__name_to_id_with_deactivated(self): # /d/1 was deactivated assert openml.datasets.functions._name_to_id("anneal") == 2 openml.config.server = self.test_server - openml.config._sync_api_config() @pytest.mark.production_server() def test__name_to_id_with_multiple_active(self): @@ -440,7 +438,6 @@ def test__getarff_md5_issue(self): } n = openml.config.connection_n_retries openml.config.connection_n_retries = 1 - openml.config._sync_api_config() self.assertRaisesRegex( OpenMLHashException, @@ -451,7 +448,6 @@ def test__getarff_md5_issue(self): ) openml.config.connection_n_retries = n - openml.config._sync_api_config() @pytest.mark.test_server() def test__get_dataset_features(self): @@ -618,7 +614,6 @@ def test_data_status(self): # admin key for test server (only admins can activate datasets. # all users can deactivate their own datasets) openml.config.apikey = TestBase.admin_key - openml.config._sync_api_config() openml.datasets.status_update(did, "active") self._assert_status_of_dataset(did=did, status="active") @@ -1558,7 +1553,6 @@ def test_list_datasets_with_high_size_parameter(self): # Reverting to test server openml.config.server = self.test_server - openml.config._sync_api_config() assert len(datasets_a) == len(datasets_b) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 13b06223a..2ecafc4c3 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -15,6 +15,7 @@ import openml.config import openml.testing from openml.testing import TestBase +from openml.enums import APIVersion, ServerType @contextmanager @@ -77,6 +78,9 @@ def test_get_config_as_dict(self): """Checks if the current configuration is returned accurately as a dict.""" config = openml.config.get_config_as_dict() _config = {} + _config["api_version"] = APIVersion.V1 + _config["fallback_api_version"] = None + _config["server_type"] = ServerType.PRODUCTION _config["apikey"] = TestBase.user_key _config["server"] = f"{openml.config.TEST_SERVER_URL}/api/v1/xml" _config["cachedir"] = self.workdir @@ -85,12 +89,15 @@ def test_get_config_as_dict(self): _config["retry_policy"] = "robot" _config["show_progress"] = False assert isinstance(config, dict) - assert len(config) == 7 + assert len(config) == 10 self.assertDictEqual(config, _config) def test_setup_with_config(self): """Checks if the OpenML configuration can be updated using _setup().""" _config = {} + _config["api_version"] = APIVersion.V1 + _config["fallback_api_version"] = None + _config["server_type"] = ServerType.PRODUCTION _config["apikey"] = TestBase.user_key _config["server"] = "https://www.openml.org/api/v1/xml" _config["cachedir"] = self.workdir From aba3d3e9ca98faa4c4068d736306b320c27f2c6f Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 02:31:41 +0500 Subject: [PATCH 139/156] update _config.py --- openml/_api/clients/http.py | 12 +-- openml/_config.py | 125 ++++++++++++++++++++++++------- openml/testing.py | 2 +- tests/test_openml/test_config.py | 12 +-- 4 files changed, 110 insertions(+), 41 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index f1ed20e7c..829abc769 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -212,21 +212,21 @@ def __init__( @property def server(self) -> str: - server = openml.config.SERVERS[self.api_version]["server"] + server = openml.config.servers[self.api_version]["server"] if server is None: + servers_repr = {k.value: v for k, v in openml.config.servers} raise ValueError( - f"server found to be None for api_version={self.api_version}" - f" in {openml.config.SERVERS}" + f'server found to be None for api_version="{self.api_version}" in {servers_repr}' ) - return server + return cast("str", server) @property def api_key(self) -> str | None: - return openml.config.SERVERS[self.api_version]["apikey"] + return cast("str | None", openml.config.SERVERS[self.api_version]["apikey"]) @property def retries(self) -> int: - return openml.config.connection_n_retries + return cast("int", openml.config.connection_n_retries) @property def retry_policy(self) -> RetryPolicy: diff --git a/openml/_config.py b/openml/_config.py index a897f17fc..3e07b2e0a 100644 --- a/openml/_config.py +++ b/openml/_config.py @@ -18,10 +18,46 @@ from typing import Any, ClassVar, Literal, cast from urllib.parse import urlparse +from openml.enums import APIVersion + logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") +SERVERS_REGISTRY: dict[str, dict[APIVersion, dict[str, str | None]]] = { + "production": { + APIVersion.V1: { + "server": "https://www.openml.org/api/v1/xml/", + "apikey": None, + }, + APIVersion.V2: { + "server": None, + "apikey": None, + }, + }, + "test": { + APIVersion.V1: { + "server": "https://test.openml.org/api/v1/xml/", + "apikey": "normaluser", + }, + APIVersion.V2: { + "server": None, + "apikey": None, + }, + }, + "local": { + APIVersion.V1: { + "server": "http://localhost:8000/api/v1/xml/", + "apikey": "normaluser", + }, + APIVersion.V2: { + "server": "http://localhost:8002/api/v1/xml/", + "apikey": "normaluser", + }, + }, +} + + def _resolve_default_cache_dir() -> Path: user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") if user_defined_cache_dir is not None: @@ -57,19 +93,38 @@ def _resolve_default_cache_dir() -> Path: class OpenMLConfig: """Dataclass storing the OpenML configuration.""" - apikey: str | None = "" - server: str = "https://www.openml.org/api/v1/xml" + servers: dict[APIVersion, dict[str, str | None]] = field( + default_factory=lambda: SERVERS_REGISTRY["production"] + ) + api_version: APIVersion = APIVersion.V1 + fallback_api_version: APIVersion | None = None cachedir: Path = field(default_factory=_resolve_default_cache_dir) avoid_duplicate_runs: bool = False retry_policy: Literal["human", "robot"] = "human" connection_n_retries: int = 5 show_progress: bool = False - def __setattr__(self, name: str, value: Any) -> None: - if name == "apikey" and value is not None and not isinstance(value, str): - raise ValueError("apikey must be a string or None") + @property + def server(self) -> str: + server = self.servers[self.api_version]["server"] + if server is None: + servers_repr = {k.value: v for k, v in self.servers.items()} + raise ValueError( + f'server found to be None for api_version="{self.api_version}" in {servers_repr}' + ) + return server + + @server.setter + def server(self, value: str | None) -> None: + self.servers[self.api_version]["server"] = value + + @property + def apikey(self) -> str | None: + return self.servers[self.api_version]["apikey"] - super().__setattr__(name, value) + @apikey.setter + def apikey(self, value: str | None) -> None: + self.servers[self.api_version]["apikey"] = value class OpenMLConfigManager: @@ -79,11 +134,14 @@ def __init__(self) -> None: self.console_handler: logging.StreamHandler | None = None self.file_handler: logging.handlers.RotatingFileHandler | None = None + server_test_v1_apikey = SERVERS_REGISTRY["test"][APIVersion.V1]["apikey"] + server_test_v1_server = SERVERS_REGISTRY["test"][APIVersion.V1]["server"] + self.OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" self.OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" - self._TEST_SERVER_NORMAL_USER_KEY = "normaluser" + self._TEST_SERVER_NORMAL_USER_KEY = server_test_v1_apikey self.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY" - self.TEST_SERVER_URL = "https://test.openml.org" + self.TEST_SERVER_URL = cast("str", server_test_v1_server).split("/api/v1/xml")[0] self._config: OpenMLConfig = OpenMLConfig() # for legacy test `test_non_writable_home` @@ -127,6 +185,10 @@ def __setattr__(self, name: str, value: Any) -> None: object.__setattr__(self, "_config", replace(self._config, **{name: value})) return None + if name in ["server", "apikey"]: + setattr(self._config, name, value) + return None + object.__setattr__(self, name, value) return None @@ -190,6 +252,21 @@ def get_server_base_url(self) -> str: domain, _ = self._config.server.split("/api", maxsplit=1) return domain.replace("api", "www") + def set_server_mode(self, mode: str) -> None: + if mode not in SERVERS_REGISTRY: + raise ValueError( + f'invalid mode="{mode}" allowed modes: {", ".join(list(SERVERS_REGISTRY.keys()))}' + ) + self._config = replace(self._config, servers=SERVERS_REGISTRY[mode]) + + def set_api_version(self, api_version: APIVersion) -> None: + if api_version not in APIVersion: + raise ValueError( + f'invalid api_version="{api_version}" ' + f"allowed versions: {', '.join(list(APIVersion))}" + ) + self._config = replace(self._config, api_version=api_version) + def set_retry_policy( self, value: Literal["human", "robot"], n_retries: int | None = None ) -> None: @@ -317,13 +394,18 @@ def _setup(self, config: dict[str, Any] | None = None) -> None: self._config = replace( self._config, - apikey=config["apikey"], - server=config["server"], + servers=config["servers"], + api_version=config["api_version"], + fallback_api_version=config["fallback_api_version"], show_progress=config["show_progress"], avoid_duplicate_runs=config["avoid_duplicate_runs"], retry_policy=config["retry_policy"], connection_n_retries=int(config["connection_n_retries"]), ) + if "server" in config: + self._config.server = config["server"] + if "apikey" in config: + self._config.apikey = config["apikey"] user_defined_cache_dir = os.environ.get(self.OPENML_CACHE_DIR_ENV_VAR) if user_defined_cache_dir is not None: @@ -393,14 +475,12 @@ def overwrite_config_context(self, config: dict[str, Any]) -> Iterator[dict[str, class ConfigurationForExamples: """Allows easy switching to and from a test configuration, used for examples.""" - _last_used_server = None - _last_used_key = None + _last_used_servers = None _start_last_called = False def __init__(self, manager: OpenMLConfigManager): self._manager = manager - self._test_apikey = manager._TEST_SERVER_NORMAL_USER_KEY - self._test_server = f"{manager.TEST_SERVER_URL}/api/v1/xml" + self._test_servers = SERVERS_REGISTRY["test"] def start_using_configuration_for_example(self) -> None: """Sets the configuration to connect to the test server with valid apikey. @@ -408,27 +488,21 @@ def start_using_configuration_for_example(self) -> None: To configuration as was before this call is stored, and can be recovered by using the `stop_use_example_configuration` method. """ - if ( - self._start_last_called - and self._manager._config.server == self._test_server - and self._manager._config.apikey == self._test_apikey - ): + if self._start_last_called and self._manager._config.servers == self._test_servers: # Method is called more than once in a row without modifying the server or apikey. # We don't want to save the current test configuration as a last used configuration. return - self._last_used_server = self._manager._config.server - self._last_used_key = self._manager._config.apikey + self._last_used_servers = self._manager._config.servers type(self)._start_last_called = True # Test server key for examples self._manager._config = replace( self._manager._config, - server=self._test_server, - apikey=self._test_apikey, + servers=self._test_servers, ) warnings.warn( - f"Switching to the test server {self._test_server} to not upload results to " + f"Switching to the test servers {self._test_servers} to not upload results to " "the live server. Using the test server may result in reduced performance of the " "API!", stacklevel=2, @@ -446,8 +520,7 @@ def stop_using_configuration_for_example(self) -> None: self._manager._config = replace( self._manager._config, - server=cast("str", self._last_used_server), - apikey=cast("str", self._last_used_key), + servers=cast("dict[APIVersion, dict[str, str | None]]", self._last_used_servers), ) type(self)._start_last_called = False diff --git a/openml/testing.py b/openml/testing.py index 9d81bc6d6..76b84b9f3 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -49,7 +49,7 @@ class TestBase(unittest.TestCase): "user": [], } flow_name_tracker: ClassVar[list[str]] = [] - test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml" + test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/" admin_key = os.environ.get(openml.config.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR) user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 62ff082f3..74e06d21f 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -15,7 +15,7 @@ import openml import openml.testing from openml.testing import TestBase -from openml.enums import APIVersion, ServerType +from openml.enums import APIVersion @contextmanager @@ -80,16 +80,14 @@ def test_get_config_as_dict(self): _config = {} _config["api_version"] = APIVersion.V1 _config["fallback_api_version"] = None - _config["server_type"] = ServerType.PRODUCTION - _config["apikey"] = TestBase.user_key - _config["server"] = f"{openml.config.TEST_SERVER_URL}/api/v1/xml" + _config["servers"] = openml._config.SERVERS_REGISTRY['production'] _config["cachedir"] = self.workdir _config["avoid_duplicate_runs"] = False _config["connection_n_retries"] = 20 _config["retry_policy"] = "robot" _config["show_progress"] = False assert isinstance(config, dict) - assert len(config) == 10 + assert len(config) == 8 self.assertDictEqual(config, _config) def test_setup_with_config(self): @@ -97,9 +95,7 @@ def test_setup_with_config(self): _config = {} _config["api_version"] = APIVersion.V1 _config["fallback_api_version"] = None - _config["server_type"] = ServerType.PRODUCTION - _config["apikey"] = TestBase.user_key - _config["server"] = "https://www.openml.org/api/v1/xml" + _config["servers"] = openml._config.SERVERS_REGISTRY['production'] _config["cachedir"] = self.workdir _config["avoid_duplicate_runs"] = True _config["retry_policy"] = "human" From d99d54d98dff0034b64ac1e6d03bffb56b87aa79 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 02:38:26 +0500 Subject: [PATCH 140/156] update test_openml_cache_dir_env_var --- tests/test_openml/test_config.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 74e06d21f..85f93f39e 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -9,6 +9,7 @@ from typing import Any, Iterator from pathlib import Path import platform +from urllib.parse import urlparse import pytest @@ -192,6 +193,10 @@ def test_openml_cache_dir_env_var(tmp_path: Path) -> None: expected_path = tmp_path / "test-cache" with safe_environ_patcher("OPENML_CACHE_DIR", str(expected_path)): + server_parts = urlparse(openml.config.server).netloc + server_parts = server_parts.split(".")[::-1] + server_parts = "/".join(server_parts) + openml.config._setup() assert openml.config._root_cache_directory == expected_path - assert openml.config.get_cache_directory() == str(expected_path / "org" / "openml" / "www") + assert openml.config.get_cache_directory() == str(expected_path / server_parts) From dc22e3ace3ae13c1971e5b5802744cae0180d871 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 13:01:32 +0500 Subject: [PATCH 141/156] fix mutable SERVERS_REGISTRY --- openml/_config.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/openml/_config.py b/openml/_config.py index 3e07b2e0a..ad71141c7 100644 --- a/openml/_config.py +++ b/openml/_config.py @@ -12,6 +12,7 @@ import warnings from collections.abc import Iterator from contextlib import contextmanager +from copy import deepcopy from dataclasses import dataclass, field, fields, replace from io import StringIO from pathlib import Path @@ -94,7 +95,7 @@ class OpenMLConfig: """Dataclass storing the OpenML configuration.""" servers: dict[APIVersion, dict[str, str | None]] = field( - default_factory=lambda: SERVERS_REGISTRY["production"] + default_factory=lambda: deepcopy(SERVERS_REGISTRY["production"]) ) api_version: APIVersion = APIVersion.V1 fallback_api_version: APIVersion | None = None @@ -134,8 +135,8 @@ def __init__(self) -> None: self.console_handler: logging.StreamHandler | None = None self.file_handler: logging.handlers.RotatingFileHandler | None = None - server_test_v1_apikey = SERVERS_REGISTRY["test"][APIVersion.V1]["apikey"] - server_test_v1_server = SERVERS_REGISTRY["test"][APIVersion.V1]["server"] + server_test_v1_apikey = self.get_servers("test")[APIVersion.V1]["apikey"] + server_test_v1_server = self.get_servers("test")[APIVersion.V1]["server"] self.OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" self.OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" @@ -252,12 +253,16 @@ def get_server_base_url(self) -> str: domain, _ = self._config.server.split("/api", maxsplit=1) return domain.replace("api", "www") - def set_server_mode(self, mode: str) -> None: + def get_servers(self, mode: str) -> dict[APIVersion, dict[str, str | None]]: if mode not in SERVERS_REGISTRY: raise ValueError( f'invalid mode="{mode}" allowed modes: {", ".join(list(SERVERS_REGISTRY.keys()))}' ) - self._config = replace(self._config, servers=SERVERS_REGISTRY[mode]) + return deepcopy(SERVERS_REGISTRY[mode]) + + def set_servers(self, mode: str) -> None: + servers = self.get_servers(mode) + self._config = replace(self._config, servers=servers) def set_api_version(self, api_version: APIVersion) -> None: if api_version not in APIVersion: @@ -480,7 +485,7 @@ class ConfigurationForExamples: def __init__(self, manager: OpenMLConfigManager): self._manager = manager - self._test_servers = SERVERS_REGISTRY["test"] + self._test_servers = manager.get_servers("test") def start_using_configuration_for_example(self) -> None: """Sets the configuration to connect to the test server with valid apikey. From 731857353d4a0a2935208b908155a089718ed669 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 13:02:49 +0500 Subject: [PATCH 142/156] update set_api_version for fallback --- openml/_config.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/openml/_config.py b/openml/_config.py index ad71141c7..18c4a3185 100644 --- a/openml/_config.py +++ b/openml/_config.py @@ -264,13 +264,28 @@ def set_servers(self, mode: str) -> None: servers = self.get_servers(mode) self._config = replace(self._config, servers=servers) - def set_api_version(self, api_version: APIVersion) -> None: + def set_api_version( + self, + api_version: APIVersion, + fallback_api_version: APIVersion | None = None, + ) -> None: if api_version not in APIVersion: raise ValueError( f'invalid api_version="{api_version}" ' f"allowed versions: {', '.join(list(APIVersion))}" ) - self._config = replace(self._config, api_version=api_version) + + if fallback_api_version is not None and fallback_api_version not in APIVersion: + raise ValueError( + f'invalid fallback_api_version="{fallback_api_version}" ' + f"allowed versions: {', '.join(list(APIVersion))}" + ) + + self._config = replace( + self._config, + api_version=api_version, + fallback_api_version=fallback_api_version, + ) def set_retry_policy( self, value: Literal["human", "robot"], n_retries: int | None = None From 29ef1879875a544a9079e58e05c4b64308fe142e Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 13:03:21 +0500 Subject: [PATCH 143/156] minor fix --- openml/_api/clients/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 829abc769..59a8bc1f2 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -222,7 +222,7 @@ def server(self) -> str: @property def api_key(self) -> str | None: - return cast("str | None", openml.config.SERVERS[self.api_version]["apikey"]) + return cast("str | None", openml.config.servers[self.api_version]["apikey"]) @property def retries(self) -> int: From cf94c891f60ad0b54070eb733e408ff57fedd41e Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 13:03:56 +0500 Subject: [PATCH 144/156] fixes for test_config --- tests/test_openml/test_config.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 85f93f39e..dc42f9588 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -81,7 +81,7 @@ def test_get_config_as_dict(self): _config = {} _config["api_version"] = APIVersion.V1 _config["fallback_api_version"] = None - _config["servers"] = openml._config.SERVERS_REGISTRY['production'] + _config["servers"] = openml.config.get_servers("production") _config["cachedir"] = self.workdir _config["avoid_duplicate_runs"] = False _config["connection_n_retries"] = 20 @@ -96,7 +96,7 @@ def test_setup_with_config(self): _config = {} _config["api_version"] = APIVersion.V1 _config["fallback_api_version"] = None - _config["servers"] = openml._config.SERVERS_REGISTRY['production'] + _config["servers"] = openml.config.get_servers("production") _config["cachedir"] = self.workdir _config["avoid_duplicate_runs"] = True _config["retry_policy"] = "human" @@ -193,10 +193,11 @@ def test_openml_cache_dir_env_var(tmp_path: Path) -> None: expected_path = tmp_path / "test-cache" with safe_environ_patcher("OPENML_CACHE_DIR", str(expected_path)): + openml.config._setup() + server_parts = urlparse(openml.config.server).netloc server_parts = server_parts.split(".")[::-1] server_parts = "/".join(server_parts) - openml.config._setup() assert openml.config._root_cache_directory == expected_path assert openml.config.get_cache_directory() == str(expected_path / server_parts) From 298fbdae81758133e1834854adef375131e47911 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 13:04:19 +0500 Subject: [PATCH 145/156] fixes in conftest urls --- tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2a7a6dcc7..5839ef8e2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -273,11 +273,11 @@ def as_robot() -> Iterator[None]: @pytest.fixture(autouse=True) def with_server(request): if "production_server" in request.keywords: - openml.config.server = "https://www.openml.org/api/v1/xml" + openml.config.server = "https://www.openml.org/api/v1/xml/" openml.config.apikey = None yield return - openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml" + openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/" openml.config.apikey = TestBase.user_key yield From 9870502be1da55bf66e20a0f58d16f08f8e2f24a Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 13:04:51 +0500 Subject: [PATCH 146/156] update test_http.py --- tests/test_api/test_http.py | 372 ++++++++++++++++++------------------ 1 file changed, 189 insertions(+), 183 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index cf8b8d9e5..e0a9bd5b6 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -7,227 +7,233 @@ from urllib.parse import urljoin, urlparse from openml.enums import APIVersion from openml.exceptions import OpenMLAuthenticationError -from openml._api import HTTPClient +from openml._api import HTTPClient, HTTPCache import openml -class TestHTTPClient(TestBase): - http_client: HTTPClient +@pytest.fixture +def cache() -> HTTPCache: + return HTTPCache() - def setUp(self): - super().setUp() - self.http_client = self.http_clients[APIVersion.V1] - def _prepare_url(self, path: str | None = None) -> str: - server = self.http_client.server - return urljoin(server, path) +@pytest.fixture +def http_client() -> HTTPClient: + return HTTPClient(api_version=APIVersion.V1) - def test_cache(self): - path = "task/31" - params = {"param1": "value1", "param2": "value2"} - url = self._prepare_url(path=path) +@pytest.fixture +def sample_path() -> str: + return "task/1" - parsed_url = urlparse(url) - netloc_parts = parsed_url.netloc.split(".")[::-1] - path_parts = parsed_url.path.strip("/").split("/") - params_key = "&".join([f"{k}={v}" for k, v in params.items()]) - key = self.cache.get_key(url, params) - expected_key = os.path.join( - *netloc_parts, - *path_parts, - params_key, - ) +@pytest.fixture +def sample_url(sample_path) -> str: + return urljoin(openml.config.server, sample_path) - # validate key - self.assertEqual(key, expected_key) - - # create mock response - req = Request("GET", url).prepare() - response = Response() - response.status_code = 200 - response.url = url - response.reason = "OK" - response._content = b"test" - response.headers = {"Content-Type": "text/xml"} - response.encoding = "utf-8" - response.request = req - response.elapsed = type("Elapsed", (), {"total_seconds": lambda x: 0.1})() - - # save to cache - self.cache.save(key, response) - - # load from cache - cached_response = self.cache.load(key) - - # validate loaded response - self.assertEqual(cached_response.status_code, 200) - self.assertEqual(cached_response.url, url) - self.assertEqual(cached_response.content, b"test") - self.assertEqual( - cached_response.headers["Content-Type"], "text/xml" - ) - @pytest.mark.uses_test_server() - def test_get(self): - response = self.http_client.get("task/1") +@pytest.fixture +def sample_download_url() -> str: + server = openml.config.server.split("api/")[0] + endpoint = "data/v1/download/1/anneal.arff" + url = server + endpoint + return url - self.assertEqual(response.status_code, 200) - self.assertIn(b"test" + response.headers = {"Content-Type": "text/xml"} + response.encoding = "utf-8" + response.request = req + response.elapsed = type("Elapsed", (), {"total_seconds": lambda x: 0.1})() - @pytest.mark.uses_test_server() - def test_get_refresh_cache(self): - path = "task/1" + cache.save(key, response) + cached = cache.load(key) - url = self._prepare_url(path=path) - key = self.cache.get_key(url, {}) - cache_path = self.cache._key_to_path(key) / "meta.json" + assert cached.status_code == 200 + assert cached.url == sample_url + assert cached.content == b"test" + assert cached.headers["Content-Type"] == "text/xml" - response1 = self.http_client.get(path, enable_cache=True) - response1_cache_time_stamp = cache_path.stat().st_mtime - response2 = self.http_client.get(path, enable_cache=True, refresh_cache=True) - response2_cache_time_stamp = cache_path.stat().st_mtime +@pytest.mark.uses_test_server() +def test_get(http_client): + response = http_client.get("task/1") - self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) - self.assertEqual(response2.status_code, 200) - self.assertEqual(response1.content, response2.content) + assert response.status_code == 200 + assert b" Date: Wed, 25 Feb 2026 13:14:49 +0500 Subject: [PATCH 147/156] undo changes with test_openml_cache_dir_env_var --- tests/test_openml/test_config.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index dc42f9588..66c60dea0 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -195,9 +195,5 @@ def test_openml_cache_dir_env_var(tmp_path: Path) -> None: with safe_environ_patcher("OPENML_CACHE_DIR", str(expected_path)): openml.config._setup() - server_parts = urlparse(openml.config.server).netloc - server_parts = server_parts.split(".")[::-1] - server_parts = "/".join(server_parts) - assert openml.config._root_cache_directory == expected_path - assert openml.config.get_cache_directory() == str(expected_path / server_parts) + assert openml.config.get_cache_directory() == str(expected_path / "org" / "openml" / "www") From 76b92bb3ee4a46cf203b9b279f3e4137ff69ba65 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 13:15:35 +0500 Subject: [PATCH 148/156] fix server mode in test_config.py --- tests/test_openml/test_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 66c60dea0..0cd642fe7 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -81,7 +81,7 @@ def test_get_config_as_dict(self): _config = {} _config["api_version"] = APIVersion.V1 _config["fallback_api_version"] = None - _config["servers"] = openml.config.get_servers("production") + _config["servers"] = openml.config.get_servers("test") _config["cachedir"] = self.workdir _config["avoid_duplicate_runs"] = False _config["connection_n_retries"] = 20 @@ -96,7 +96,7 @@ def test_setup_with_config(self): _config = {} _config["api_version"] = APIVersion.V1 _config["fallback_api_version"] = None - _config["servers"] = openml.config.get_servers("production") + _config["servers"] = openml.config.get_servers("test") _config["cachedir"] = self.workdir _config["avoid_duplicate_runs"] = True _config["retry_policy"] = "human" From 419edcb7c71debc8a35710213bb29259961e3921 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 14:45:58 +0500 Subject: [PATCH 149/156] move _HEADERS to confing --- openml/_api/clients/http.py | 9 +-------- openml/_api/clients/minio.py | 7 ------- openml/_config.py | 4 ++++ tests/test_api/test_http.py | 5 ++--- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 59a8bc1f2..da6cdda09 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -17,7 +17,6 @@ from requests import Response import openml -from openml.__version__ import __version__ from openml.enums import APIVersion, RetryPolicy from openml.exceptions import ( OpenMLAuthenticationError, @@ -27,8 +26,6 @@ OpenMLServerNoResult, ) -_HEADERS: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - class HTTPCache: """ @@ -236,10 +233,6 @@ def retry_policy(self) -> RetryPolicy: def retry_func(self) -> Callable: return self._human_delay if self.retry_policy == RetryPolicy.HUMAN else self._robot_delay - @property - def headers(self) -> dict[str, str]: - return _HEADERS - def _robot_delay(self, n: int) -> float: """ Compute delay for automated retry policy. @@ -594,7 +587,7 @@ def _request( # noqa: PLR0913, C901 # prepare headers headers = request_kwargs.pop("headers", {}).copy() - headers.update(_HEADERS) + headers.update(openml.config._HEADERS) files = request_kwargs.pop("files", None) diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py index baaf91abd..920b485e0 100644 --- a/openml/_api/clients/minio.py +++ b/openml/_api/clients/minio.py @@ -3,9 +3,6 @@ from pathlib import Path import openml -from openml.__version__ import __version__ - -_HEADERS: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} class MinIOClient: @@ -29,7 +26,3 @@ class MinIOClient: @property def path(self) -> Path: return Path(openml.config.get_cache_directory()) - - @property - def headers(self) -> dict[str, str]: - return _HEADERS diff --git a/openml/_config.py b/openml/_config.py index 18c4a3185..f50372a21 100644 --- a/openml/_config.py +++ b/openml/_config.py @@ -21,6 +21,8 @@ from openml.enums import APIVersion +from .__version__ import __version__ + logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") @@ -141,6 +143,7 @@ def __init__(self) -> None: self.OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" self.OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" self._TEST_SERVER_NORMAL_USER_KEY = server_test_v1_apikey + self._HEADERS: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} self.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY" self.TEST_SERVER_URL = cast("str", server_test_v1_server).split("/api/v1/xml")[0] @@ -176,6 +179,7 @@ def __setattr__(self, name: str, value: Any) -> None: "OPENML_CACHE_DIR_ENV_VAR", "OPENML_SKIP_PARQUET_ENV_VAR", "_TEST_SERVER_NORMAL_USER_KEY", + "_HEADERS", }: return object.__setattr__(self, name, value) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index e0a9bd5b6..95863bfbb 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -1,7 +1,6 @@ from requests import Response, Request, Session from unittest.mock import patch import pytest -from openml.testing import TestBase import os from pathlib import Path from urllib.parse import urljoin, urlparse @@ -209,7 +208,7 @@ def test_post(http_client): url=urljoin(openml.config.server, resource_name), params={}, data={"api_key": openml.config.apikey}, - headers=http_client.headers, + headers=openml.config._HEADERS, files=resource_files, ) @@ -234,6 +233,6 @@ def test_delete(http_client): ), params={"api_key": openml.config.apikey}, data={}, - headers=http_client.headers, + headers=openml.config._HEADERS, files=None, ) From cb6d937e68b1d6a2068abdddee5736c533ed8049 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 14:46:16 +0500 Subject: [PATCH 150/156] add fixtures for migration tests --- tests/conftest.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 5839ef8e2..c8455334b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,6 +34,8 @@ from pathlib import Path import pytest import openml_sklearn +from openml._api import HTTPClient, MinIOClient +from openml.enums import APIVersion import openml from openml.testing import TestBase @@ -307,3 +309,28 @@ def workdir(tmp_path): os.chdir(tmp_path) yield tmp_path os.chdir(original_cwd) + + +@pytest.fixture +def use_api_v1() -> None: + openml.config.set_api_version(api_version=APIVersion.V1) + + +@pytest.fixture +def use_api_v2() -> None: + openml.config.set_api_version(api_version=APIVersion.V2) + + +@pytest.fixture +def http_client_v1() -> HTTPClient: + return HTTPClient(api_version=APIVersion.V1) + + +@pytest.fixture +def http_client_v2() -> HTTPClient: + return HTTPClient(api_version=APIVersion.V2) + + +@pytest.fixture +def minio_client() -> MinIOClient: + return MinIOClient() From 8544c8aa80e0bbd87159a6a3344ff9579bbf88ed Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 14:46:35 +0500 Subject: [PATCH 151/156] update test_http.py with fixtures --- tests/test_api/test_http.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 95863bfbb..e2150f5b0 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -11,13 +11,13 @@ @pytest.fixture -def cache() -> HTTPCache: - return HTTPCache() +def cache(http_client_v1) -> HTTPCache: + return http_client_v1.cache @pytest.fixture -def http_client() -> HTTPClient: - return HTTPClient(api_version=APIVersion.V1) +def http_client(http_client_v1) -> HTTPClient: + return http_client_v1 @pytest.fixture From d4c413bf499f682e5a75ac7bc4bb55df12516725 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 14:46:52 +0500 Subject: [PATCH 152/156] update test_versions.py --- tests/test_api/test_versions.py | 347 ++++++++++++++++++-------------- 1 file changed, 193 insertions(+), 154 deletions(-) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index a31595457..8f0b17c75 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,179 +1,218 @@ import pytest from requests import Session, Response from unittest.mock import patch -from openml.testing import TestBase -from openml._api import FallbackProxy, ResourceAPI -from openml.enums import ResourceType, APIVersion +from openml._api import FallbackProxy, ResourceAPI, ResourceV1API, ResourceV2API, TaskAPI +from openml.enums import ResourceType from openml.exceptions import OpenMLNotSupportedError +import openml -class TestResourceAPIBase(TestBase): - resource: ResourceAPI | FallbackProxy - - @property - def http_client(self): - return self.resource._http - - def _publish(self): - resource_name = "task" - resource_files = {"description": """Resource Description File"""} - resource_id = 123 - - with patch.object(Session, "request") as mock_request: - mock_request.return_value = Response() - mock_request.return_value.status_code = 200 - mock_request.return_value._content = f'\n\t{resource_id}\n\n'.encode("utf-8") - - published_resource_id = self.resource.publish( - resource_name, - files=resource_files, - ) - - self.assertEqual(resource_id, published_resource_id) - - mock_request.assert_called_once_with( - method="POST", - url=self.http_client.server + self.http_client.base_url + resource_name, - params={}, - data={'api_key': self.http_client.api_key}, - headers=self.http_client.headers, - files=resource_files, - ) - - def _delete(self): - resource_name = "task" - resource_id = 123 - - with patch.object(Session, "request") as mock_request: - mock_request.return_value = Response() - mock_request.return_value.status_code = 200 - mock_request.return_value._content = f'\n {resource_id}\n\n'.encode("utf-8") - - self.resource.delete(resource_id) - - mock_request.assert_called_once_with( - method="DELETE", - url=self.http_client.server + self.http_client.base_url + resource_name + "/" + str(resource_id), - params={'api_key': self.http_client.api_key}, - data={}, - headers=self.http_client.headers, - files=None, - ) - - def _tag(self): - resource_id = 123 - resource_tag = "TAG" - - with patch.object(Session, "request") as mock_request: - mock_request.return_value = Response() - mock_request.return_value.status_code = 200 - mock_request.return_value._content = f'{resource_id}{resource_tag}'.encode("utf-8") - - tags = self.resource.tag(resource_id, resource_tag) - self.assertIn(resource_tag, tags) - - mock_request.assert_called_once_with( - method="POST", - url=self.http_client.server + self.http_client.base_url + self.resource.resource_type + "/tag", - params={}, - data={'api_key': self.http_client.api_key, 'task_id': resource_id, 'tag': resource_tag}, - headers=self.http_client.headers, - files=None, - ) - - def _untag(self): - resource_id = 123 - resource_tag = "TAG" - - with patch.object(Session, "request") as mock_request: - mock_request.return_value = Response() - mock_request.return_value.status_code = 200 - mock_request.return_value._content = f'{resource_id}'.encode("utf-8") - - tags = self.resource.untag(resource_id, resource_tag) - self.assertNotIn(resource_tag, tags) - - mock_request.assert_called_once_with( - method="POST", - url=self.http_client.server + self.http_client.base_url + self.resource.resource_type + "/untag", - params={}, - data={'api_key': self.http_client.api_key, 'task_id': resource_id, 'tag': resource_tag}, - headers=self.http_client.headers, - files=None, - ) - -class TestResourceV1API(TestResourceAPIBase): - def setUp(self): - super().setUp() - self.resource = self._create_resource( - api_version=APIVersion.V1, - resource_type=ResourceType.TASK, - ) +class DummyTaskAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.TASK - def test_publish(self): - self._publish() - def test_delete(self): - self._delete() +class DummyTaskV1API(ResourceV1API, TaskAPI): + pass - def test_tag(self): - self._tag() - def test_untag(self): - self._untag() +class DummyTaskV2API(ResourceV2API, TaskAPI): + pass -class TestResourceV2API(TestResourceAPIBase): - def setUp(self): - super().setUp() - self.resource = self._create_resource( - api_version=APIVersion.V2, - resource_type=ResourceType.TASK, - ) +@pytest.fixture +def dummy_task_v1(http_client_v1, minio_client) -> DummyTaskV1API: + return DummyTaskV1API(http=http_client_v1, minio=minio_client) + - def test_publish(self): - with pytest.raises(OpenMLNotSupportedError): - self._publish() +@pytest.fixture +def dummy_task_v2(http_client_v2, minio_client) -> DummyTaskV1API: + return DummyTaskV2API(http=http_client_v2, minio=minio_client) - def test_delete(self): - with pytest.raises(OpenMLNotSupportedError): - self._delete() - def test_tag(self): - with pytest.raises(OpenMLNotSupportedError): - self._tag() +@pytest.fixture +def dummy_task_fallback(dummy_task_v1, dummy_task_v2) -> DummyTaskV1API: + return FallbackProxy(dummy_task_v2, dummy_task_v1) - def test_untag(self): - with pytest.raises(OpenMLNotSupportedError): - self._untag() +def _publish(resource): + resource_name = resource.resource_type.value + resource_files = {"description": "Resource Description File"} + resource_id = 123 -class TestResourceFallbackAPI(TestResourceAPIBase): - @property - def http_client(self): - # since these methods are not implemented for v2, they will fallback to v1 api - return self.http_clients[APIVersion.V1] + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = ( + f'\n' + f"\t{resource_id}\n" + f"\n" + ).encode("utf-8") - def setUp(self): - super().setUp() - resource_v1 = self._create_resource( - api_version=APIVersion.V1, - resource_type=ResourceType.TASK, + published_resource_id = resource.publish( + resource_name, + files=resource_files, ) - resource_v2 = self._create_resource( - api_version=APIVersion.V2, - resource_type=ResourceType.TASK, + + assert resource_id == published_resource_id + + mock_request.assert_called_once_with( + method="POST", + url=openml.config.server + resource_name, + params={}, + data={"api_key": openml.config.apikey}, + headers=openml.config._HEADERS, + files=resource_files, ) - self.resource = FallbackProxy(resource_v2, resource_v1) - def test_publish(self): - self._publish() - def test_delete(self): - self._delete() +def _delete(resource): + resource_name = resource.resource_type.value + resource_id = 123 + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = ( + f'\n' + f" {resource_id}\n" + f"\n" + ).encode("utf-8") + + resource.delete(resource_id) + + mock_request.assert_called_once_with( + method="DELETE", + url=( + openml.config.server + + resource_name + + "/" + + str(resource_id) + ), + params={"api_key": openml.config.apikey}, + data={}, + headers=openml.config._HEADERS, + files=None, + ) + +def _tag(resource): + resource_id = 123 + resource_tag = "TAG" + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = ( + f'' + f"{resource_id}" + f"{resource_tag}" + f"" + ).encode("utf-8") + + tags = resource.tag(resource_id, resource_tag) + + assert resource_tag in tags + + mock_request.assert_called_once_with( + method="POST", + url=( + openml.config.server + + resource.resource_type + + "/tag" + ), + params={}, + data={ + "api_key": openml.config.apikey, + "task_id": resource_id, + "tag": resource_tag, + }, + headers=openml.config._HEADERS, + files=None, + ) + + +def _untag(resource): + resource_id = 123 + resource_tag = "TAG" + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = ( + f'' + f"{resource_id}" + f"" + ).encode("utf-8") + + tags = resource.untag(resource_id, resource_tag) + + assert resource_tag not in tags + + mock_request.assert_called_once_with( + method="POST", + url=( + openml.config.server + + resource.resource_type + + "/untag" + ), + params={}, + data={ + "api_key": openml.config.apikey, + "task_id": resource_id, + "tag": resource_tag, + }, + headers=openml.config._HEADERS, + files=None, + ) + + + +def test_v1_publish(dummy_task_v1, use_api_v1): + _publish(dummy_task_v1) + + +def test_v1_delete(dummy_task_v1, use_api_v1): + _delete(dummy_task_v1) + + +def test_v1_tag(dummy_task_v1, use_api_v1): + _tag(dummy_task_v1) + + +def test_v1_untag(dummy_task_v1, use_api_v1): + _untag(dummy_task_v1) + + +def test_v2_publish_not_supported(dummy_task_v2, use_api_v2): + with pytest.raises(OpenMLNotSupportedError): + _publish(dummy_task_v2) + + +def test_v2_delete_not_supported(dummy_task_v2, use_api_v2): + with pytest.raises(OpenMLNotSupportedError): + _delete(dummy_task_v2) + + +def test_v2_tag_not_supported(dummy_task_v2, use_api_v2): + with pytest.raises(OpenMLNotSupportedError): + _tag(dummy_task_v2) + + +def test_v2_untag_not_supported(dummy_task_v2, use_api_v2): + with pytest.raises(OpenMLNotSupportedError): + _untag(dummy_task_v2) + + +def test_fallback_publish(dummy_task_fallback, use_api_v1): + _publish(dummy_task_fallback) + + +def test_fallback_delete(dummy_task_fallback, use_api_v1): + _delete(dummy_task_fallback) + + +def test_fallback_tag(dummy_task_fallback, use_api_v1): + _tag(dummy_task_fallback) - def test_tag(self): - self._tag() - def test_untag(self): - self._untag() +def test_fallback_untag(dummy_task_fallback, use_api_v1): + _untag(dummy_task_fallback) From fab1a15472b1981483a5451f291d841fbe0ee961 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 25 Feb 2026 15:06:15 +0500 Subject: [PATCH 153/156] update test_versions.py --- tests/test_api/test_versions.py | 74 ++++++++++++++++----------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 8f0b17c75..c533ead75 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -34,7 +34,8 @@ def dummy_task_fallback(dummy_task_v1, dummy_task_v2) -> DummyTaskV1API: return FallbackProxy(dummy_task_v2, dummy_task_v1) -def _publish(resource): +def test_v1_publish(dummy_task_v1, use_api_v1): + resource = dummy_task_v1 resource_name = resource.resource_type.value resource_files = {"description": "Resource Description File"} resource_id = 123 @@ -65,7 +66,8 @@ def _publish(resource): ) -def _delete(resource): +def test_v1_delete(dummy_task_v1, use_api_v1): + resource = dummy_task_v1 resource_name = resource.resource_type.value resource_id = 123 @@ -94,7 +96,9 @@ def _delete(resource): files=None, ) -def _tag(resource): + +def test_v1_tag(dummy_task_v1, use_api_v1): + resource = dummy_task_v1 resource_id = 123 resource_tag = "TAG" @@ -130,7 +134,8 @@ def _tag(resource): ) -def _untag(resource): +def test_v1_untag(dummy_task_v1, use_api_v1): + resource = dummy_task_v1 resource_id = 123 resource_tag = "TAG" @@ -165,54 +170,49 @@ def _untag(resource): ) - -def test_v1_publish(dummy_task_v1, use_api_v1): - _publish(dummy_task_v1) - - -def test_v1_delete(dummy_task_v1, use_api_v1): - _delete(dummy_task_v1) - - -def test_v1_tag(dummy_task_v1, use_api_v1): - _tag(dummy_task_v1) - - -def test_v1_untag(dummy_task_v1, use_api_v1): - _untag(dummy_task_v1) - - -def test_v2_publish_not_supported(dummy_task_v2, use_api_v2): +def test_v2_publish(dummy_task_v2, use_api_v2): with pytest.raises(OpenMLNotSupportedError): - _publish(dummy_task_v2) + dummy_task_v2.publish(path=None, files=None) -def test_v2_delete_not_supported(dummy_task_v2, use_api_v2): +def test_v2_delete(dummy_task_v2, use_api_v2): with pytest.raises(OpenMLNotSupportedError): - _delete(dummy_task_v2) + dummy_task_v2.delete(resource_id=None) -def test_v2_tag_not_supported(dummy_task_v2, use_api_v2): +def test_v2_tag(dummy_task_v2, use_api_v2): with pytest.raises(OpenMLNotSupportedError): - _tag(dummy_task_v2) + dummy_task_v2.tag(resource_id=None, tag=None) -def test_v2_untag_not_supported(dummy_task_v2, use_api_v2): +def test_v2_untag(dummy_task_v2, use_api_v2): with pytest.raises(OpenMLNotSupportedError): - _untag(dummy_task_v2) + dummy_task_v2.untag(resource_id=None, tag=None) -def test_fallback_publish(dummy_task_fallback, use_api_v1): - _publish(dummy_task_fallback) +def test_fallback_publish(dummy_task_fallback): + with patch.object(ResourceV1API, "publish") as mock_publish: + mock_publish.return_value = None + dummy_task_fallback.publish(path=None, files=None) + mock_publish.assert_called_once_with(path=None, files=None) -def test_fallback_delete(dummy_task_fallback, use_api_v1): - _delete(dummy_task_fallback) +def test_fallback_delete(dummy_task_fallback): + with patch.object(ResourceV1API, "delete") as mock_delete: + mock_delete.return_value = None + dummy_task_fallback.delete(resource_id=None) + mock_delete.assert_called_once_with(resource_id=None) -def test_fallback_tag(dummy_task_fallback, use_api_v1): - _tag(dummy_task_fallback) +def test_fallback_tag(dummy_task_fallback): + with patch.object(ResourceV1API, "tag") as mock_tag: + mock_tag.return_value = None + dummy_task_fallback.tag(resource_id=None, tag=None) + mock_tag.assert_called_once_with(resource_id=None, tag=None) -def test_fallback_untag(dummy_task_fallback, use_api_v1): - _untag(dummy_task_fallback) +def test_fallback_untag(dummy_task_fallback): + with patch.object(ResourceV1API, "untag") as mock_untag: + mock_untag.return_value = None + dummy_task_fallback.untag(resource_id=None, tag=None) + mock_untag.assert_called_once_with(resource_id=None, tag=None) From 276324a03cc01860049718bcf0bad5824af93317 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 26 Feb 2026 06:58:26 +0500 Subject: [PATCH 154/156] fix error message in HTTPClient.server --- openml/_api/clients/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index da6cdda09..913d3dd00 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -211,7 +211,7 @@ def __init__( def server(self) -> str: server = openml.config.servers[self.api_version]["server"] if server is None: - servers_repr = {k.value: v for k, v in openml.config.servers} + servers_repr = {k.value: v for k, v in openml.config.servers.items()} raise ValueError( f'server found to be None for api_version="{self.api_version}" in {servers_repr}' ) From 73f759401cc27bfd81a5df18dc6e572d68b32eb7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 26 Feb 2026 07:04:27 +0500 Subject: [PATCH 155/156] fixes in test_versions.py: use DummyTaskAPI instead of TaskAPI --- tests/test_api/test_versions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index c533ead75..58ca3c91b 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,7 +1,7 @@ import pytest from requests import Session, Response from unittest.mock import patch -from openml._api import FallbackProxy, ResourceAPI, ResourceV1API, ResourceV2API, TaskAPI +from openml._api import FallbackProxy, ResourceAPI, ResourceV1API, ResourceV2API from openml.enums import ResourceType from openml.exceptions import OpenMLNotSupportedError import openml @@ -11,11 +11,11 @@ class DummyTaskAPI(ResourceAPI): resource_type: ResourceType = ResourceType.TASK -class DummyTaskV1API(ResourceV1API, TaskAPI): +class DummyTaskV1API(ResourceV1API, DummyTaskAPI): pass -class DummyTaskV2API(ResourceV2API, TaskAPI): +class DummyTaskV2API(ResourceV2API, DummyTaskAPI): pass From 2ee7fa351952bcf71f65edac64efab7357079e13 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 26 Feb 2026 07:40:01 +0500 Subject: [PATCH 156/156] add clients in openml._backend --- openml/_api/setup/backend.py | 13 +++++++++++++ openml/_api/setup/builder.py | 30 +++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index dd94a4a79..8ed37714d 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -7,6 +7,7 @@ from .builder import APIBackendBuilder if TYPE_CHECKING: + from openml._api.clients import HTTPClient, MinIOClient from openml._api.resources import ( DatasetAPI, EstimationProcedureAPI, @@ -111,6 +112,18 @@ def run(self) -> RunAPI: def setup(self) -> SetupAPI: return cast("SetupAPI", self._backend.setup) + @property + def http_client(self) -> HTTPClient: + return cast("HTTPClient", self._backend.http_client) + + @property + def fallback_http_client(self) -> HTTPClient | None: + return cast("HTTPClient | None", self._backend.fallback_http_client) + + @property + def minio_client(self) -> MinIOClient: + return cast("MinIOClient", self._backend.minio_client) + @classmethod def get_instance(cls) -> APIBackend: """ diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 0d55de85f..573129316 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -1,10 +1,15 @@ from __future__ import annotations from collections.abc import Mapping +from typing import TYPE_CHECKING from openml._api.clients import HTTPClient, MinIOClient -from openml._api.resources import API_REGISTRY, FallbackProxy, ResourceAPI -from openml.enums import APIVersion, ResourceType +from openml._api.resources import API_REGISTRY, FallbackProxy +from openml.enums import ResourceType + +if TYPE_CHECKING: + from openml._api.resources import ResourceAPI + from openml.enums import APIVersion class APIBackendBuilder: @@ -41,10 +46,17 @@ class APIBackendBuilder: API interface for run resources. setup : ResourceAPI | FallbackProxy API interface for setup resources. + http_client : HTTPClient + Client for HTTP Communication. + fallback_http_client : HTTPClient | None + Fallback Client for HTTP Communication. + minio_client : MinIOClient + Client for MinIO Communication. """ def __init__( self, + clients: Mapping[str, HTTPClient | MinIOClient | None], resource_apis: Mapping[ResourceType, ResourceAPI | FallbackProxy], ): self.dataset = resource_apis[ResourceType.DATASET] @@ -56,6 +68,9 @@ def __init__( self.study = resource_apis[ResourceType.STUDY] self.run = resource_apis[ResourceType.RUN] self.setup = resource_apis[ResourceType.SETUP] + self.http_client = clients["http_client"] + self.fallback_http_client = clients["fallback_http_client"] + self.minio_client = clients["minio_client"] @classmethod def build( @@ -82,17 +97,22 @@ def build( Builder instance with all resource API interfaces initialized. """ minio_client = MinIOClient() - primary_http_client = HTTPClient(api_version=api_version) + clients: dict[str, HTTPClient | MinIOClient | None] = { + "http_client": primary_http_client, + "fallback_http_client": None, + "minio_client": minio_client, + } resource_apis: dict[ResourceType, ResourceAPI] = {} for resource_type, resource_api_cls in API_REGISTRY[api_version].items(): resource_apis[resource_type] = resource_api_cls(primary_http_client, minio_client) if fallback_api_version is None: - return cls(resource_apis) + return cls(clients, resource_apis) fallback_http_client = HTTPClient(api_version=fallback_api_version) + clients["fallback_http_client"] = fallback_http_client fallback_resource_apis: dict[ResourceType, ResourceAPI] = {} for resource_type, resource_api_cls in API_REGISTRY[fallback_api_version].items(): @@ -105,4 +125,4 @@ def build( for name in resource_apis } - return cls(merged) + return cls(clients, merged)