diff --git a/scripts/check_async_docstrings.py b/scripts/check_async_docstrings.py index 311a6a46..b3defbc5 100755 --- a/scripts/check_async_docstrings.py +++ b/scripts/check_async_docstrings.py @@ -11,6 +11,12 @@ found_issues = False +# Methods where the async docstring is intentionally different from the sync one +# (e.g. because they accept different parameter types). +SKIPPED_METHODS = { + 'with_custom_client', +} + # Get the directory of the source files clients_path = Path(__file__).parent.resolve() / '../src/apify_client' @@ -37,6 +43,10 @@ if len(async_method.decorators) and str(async_method.decorators[0].value) == 'ignore_docs': continue + # Skip methods whose docstrings are intentionally different + if async_method.name in SKIPPED_METHODS: + continue + # If the sync method has a docstring, check if it matches the async dostring if sync_method and isinstance(sync_method.value[0].value, str): sync_docstring = sync_method.value[0].value diff --git a/scripts/fix_async_docstrings.py b/scripts/fix_async_docstrings.py index 92849e20..136bc9fb 100755 --- a/scripts/fix_async_docstrings.py +++ b/scripts/fix_async_docstrings.py @@ -6,6 +6,12 @@ from redbaron import RedBaron from utils import sync_to_async_docstring +# Methods where the async docstring is intentionally different from the sync one +# (e.g. because they accept different parameter types). +SKIPPED_METHODS = { + 'with_custom_client', +} + # Get the directory of the source files clients_path = Path(__file__).parent.resolve() / '../src/apify_client' @@ -34,6 +40,10 @@ if len(async_method.decorators) and str(async_method.decorators[0].value) == 'ignore_docs': continue + # Skip methods whose docstrings are intentionally different + if async_method.name in SKIPPED_METHODS: + continue + # Skip methods that don't exist in the sync class if sync_method is None: continue diff --git a/src/apify_client/__init__.py b/src/apify_client/__init__.py index e2e6081b..dec2fc2d 100644 --- a/src/apify_client/__init__.py +++ b/src/apify_client/__init__.py @@ -1,7 +1,23 @@ from importlib import metadata from ._apify_client import ApifyClient, ApifyClientAsync +from ._http_clients import ( + HttpClient, + HttpClientAsync, + HttpResponse, + ImpitHttpClient, + ImpitHttpClientAsync, +) __version__ = metadata.version('apify-client') -__all__ = ['ApifyClient', 'ApifyClientAsync', '__version__'] +__all__ = [ + 'ApifyClient', + 'ApifyClientAsync', + 'HttpClient', + 'HttpClientAsync', + 'HttpResponse', + 'ImpitHttpClient', + 'ImpitHttpClientAsync', + '__version__', +] diff --git a/src/apify_client/_apify_client.py b/src/apify_client/_apify_client.py index 17651f15..b6f7880c 100644 --- a/src/apify_client/_apify_client.py +++ b/src/apify_client/_apify_client.py @@ -1,8 +1,7 @@ from __future__ import annotations -import warnings from functools import cached_property -from typing import TYPE_CHECKING, ClassVar +from typing import TYPE_CHECKING from apify_client._client_registry import ClientRegistry, ClientRegistryAsync from apify_client._consts import ( @@ -13,7 +12,7 @@ DEFAULT_TIMEOUT, ) from apify_client._docs import docs_group -from apify_client._http_clients import HttpClient, HttpClientAsync +from apify_client._http_clients import HttpClient, HttpClientAsync, ImpitHttpClient, ImpitHttpClientAsync from apify_client._resource_clients import ( ActorClient, ActorClientAsync, @@ -71,6 +70,7 @@ WebhookDispatchCollectionClientAsync, ) from apify_client._statistics import ClientStatistics +from apify_client._utils import check_custom_headers if TYPE_CHECKING: from datetime import timedelta @@ -94,8 +94,8 @@ class ApifyClient: client = ApifyClient(token='MY-APIFY-TOKEN') # Start an Actor and wait for it to finish. - actor_client = client.actor('username/my-actor') - run = actor_client.call(run_input={'query': 'web scraping'}) + actor_client = client.actor('apify/python-example') + run = actor_client.call(run_input={'first_number': 1, 'second_number': 2}) # Fetch results from the run's default dataset. if run is not None: @@ -106,8 +106,6 @@ class ApifyClient: ``` """ - _OVERRIDABLE_DEFAULT_HEADERS: ClassVar[set[str]] = {'Accept', 'Authorization', 'Accept-Encoding', 'User-Agent'} - def __init__( self, token: str | None = None, @@ -121,6 +119,8 @@ def __init__( ) -> None: """Initialize the Apify API client. + To use a custom HTTP client, use the `with_custom_client` class method instead. + Args: token: The Apify API token. You can find your token on the [Integrations](https://console.apify.com/account/integrations) page in the Apify Console. @@ -129,9 +129,10 @@ def __init__( as well. api_public_url: The globally accessible URL of the Apify API server. Should be set only if `api_url` is an internal URL that is not globally accessible. Defaults to https://api.apify.com. - max_retries: Maximum number of retry attempts for failed requests. - min_delay_between_retries: Minimum delay between retries (increases exponentially with each attempt). - timeout: Timeout for HTTP requests sent to the Apify API. + max_retries: How many times to retry a failed request at most. + min_delay_between_retries: How long will the client wait between retrying requests + (increases exponentially from this value). + timeout: The socket timeout of the HTTP requests sent to the Apify API. headers: Additional HTTP headers to include in all API requests. """ # We need to do this because of mocking in tests and default mutable arguments. @@ -139,7 +140,7 @@ def __init__( api_public_url = DEFAULT_API_URL if api_public_url is None else api_public_url if headers: - self._check_custom_headers(headers) + check_custom_headers(self.__class__.__name__, headers) self._token = token """Apify API token for authentication.""" @@ -153,15 +154,8 @@ def __init__( self._statistics = ClientStatistics() """Collector for client request statistics.""" - self._http_client = HttpClient( - token=self._token, - timeout=timeout, - max_retries=max_retries, - min_delay_between_retries=min_delay_between_retries, - statistics=self._statistics, - headers=headers, - ) - """HTTP client used to communicate with the Apify API.""" + self._http_client: HttpClient | None = None + """HTTP client used to communicate with the Apify API. Lazily initialized on first access.""" self._client_registry = ClientRegistry( actor_client=ActorClient, @@ -194,33 +188,86 @@ def __init__( ) """Registry of resource client classes used for dependency injection.""" + # Configuration for the default HTTP client (used if a custom client is not provided). + self._max_retries = max_retries + self._min_delay_between_retries = min_delay_between_retries + self._timeout = timeout + self._headers = headers + + @classmethod + def with_custom_client( + cls, + token: str | None = None, + *, + api_url: str = DEFAULT_API_URL, + api_public_url: str | None = DEFAULT_API_URL, + http_client: HttpClient, + ) -> ApifyClient: + """Create an `ApifyClient` instance with a custom HTTP client. + + Use this alternative constructor when you want to provide your own HTTP client implementation + instead of the default one. The custom client is responsible for its own configuration + (retries, timeouts, headers, etc.). + + ### Usage + + ```python + from apify_client import ApifyClient, HttpClient, HttpResponse + + class MyHttpClient(HttpClient): + def call(self, *, method, url, **kwargs) -> HttpResponse: + ... + + client = ApifyClient.with_custom_client( + token='MY-APIFY-TOKEN', + http_client=MyHttpClient(), + ) + ``` + + Args: + token: The Apify API token. + api_url: The URL of the Apify API server to connect to. Defaults to https://api.apify.com. + api_public_url: The globally accessible URL of the Apify API server. Defaults to https://api.apify.com. + http_client: A custom HTTP client instance extending `HttpClient`. + """ + instance = cls(token=token, api_url=api_url, api_public_url=api_public_url) + instance._http_client = http_client + return instance + + @property + def token(self) -> str | None: + """The Apify API token used by the client.""" + return self._token + + @property + def http_client(self) -> HttpClient: + """The HTTP client instance used for API communication. + + Returns the custom HTTP client if one was provided via `with_custom_client`, + or the default `ImpitHttpClient` otherwise (lazily created on first access). + """ + if self._http_client is None: + self._http_client = ImpitHttpClient( + token=self._token, + timeout=self._timeout, + max_retries=self._max_retries, + min_delay_between_retries=self._min_delay_between_retries, + statistics=self._statistics, + headers=self._headers, + ) + + return self._http_client + @cached_property def _base_kwargs(self) -> dict: """Base keyword arguments for resource client construction.""" return { 'base_url': self._base_url, 'public_base_url': self._public_base_url, - 'http_client': self._http_client, + 'http_client': self.http_client, 'client_registry': self._client_registry, } - def _check_custom_headers(self, headers: dict[str, str]) -> None: - """Warn if custom headers override important default headers.""" - overwrite_headers = [key for key in headers if key.title() in self._OVERRIDABLE_DEFAULT_HEADERS] - if overwrite_headers: - warnings.warn( - f'{", ".join(overwrite_headers)} headers of {self.__class__.__name__} was overridden with an ' - 'explicit value. A wrong header value can lead to API errors, it is recommended to use the default ' - f'value for following headers: {", ".join(self._OVERRIDABLE_DEFAULT_HEADERS)}.', - category=UserWarning, - stacklevel=3, - ) - - @property - def token(self) -> str | None: - """The Apify API token used by the client.""" - return self._token - def actor(self, actor_id: str) -> ActorClient: """Get the sub-client for a specific Actor. @@ -385,8 +432,8 @@ async def main() -> None: client = ApifyClientAsync(token='MY-APIFY-TOKEN') # Start an Actor and wait for it to finish. - actor_client = client.actor('username/my-actor') - run = await actor_client.call(run_input={'query': 'web scraping'}) + actor_client = client.actor('apify/python-example') + run = await actor_client.call(run_input={'first_number': 1, 'second_number': 2}) # Fetch results from the run's default dataset. if run is not None: @@ -400,8 +447,6 @@ async def main() -> None: ``` """ - _OVERRIDABLE_DEFAULT_HEADERS: ClassVar[set[str]] = {'Accept', 'Authorization', 'Accept-Encoding', 'User-Agent'} - def __init__( self, token: str | None = None, @@ -415,6 +460,8 @@ def __init__( ) -> None: """Initialize the Apify API client. + To use a custom HTTP client, use the `with_custom_client` class method instead. + Args: token: The Apify API token. You can find your token on the [Integrations](https://console.apify.com/account/integrations) page in the Apify Console. @@ -423,9 +470,10 @@ def __init__( as well. api_public_url: The globally accessible URL of the Apify API server. Should be set only if `api_url` is an internal URL that is not globally accessible. Defaults to https://api.apify.com. - max_retries: Maximum number of retry attempts for failed requests. - min_delay_between_retries: Minimum delay between retries (increases exponentially with each attempt). - timeout: Timeout for HTTP requests sent to the Apify API. + max_retries: How many times to retry a failed request at most. + min_delay_between_retries: How long will the client wait between retrying requests + (increases exponentially from this value). + timeout: The socket timeout of the HTTP requests sent to the Apify API. headers: Additional HTTP headers to include in all API requests. """ # We need to do this because of mocking in tests and default mutable arguments. @@ -433,7 +481,7 @@ def __init__( api_public_url = DEFAULT_API_URL if api_public_url is None else api_public_url if headers: - self._check_custom_headers(headers) + check_custom_headers(self.__class__.__name__, headers) self._token = token """Apify API token for authentication.""" @@ -447,15 +495,8 @@ def __init__( self._statistics = ClientStatistics() """Collector for client request statistics.""" - self._http_client = HttpClientAsync( - token=self._token, - timeout=timeout, - max_retries=max_retries, - min_delay_between_retries=min_delay_between_retries, - statistics=self._statistics, - headers=headers, - ) - """HTTP client used to communicate with the Apify API.""" + self._http_client: HttpClientAsync | None = None + """HTTP client used to communicate with the Apify API. Lazily initialized on first access.""" self._client_registry = ClientRegistryAsync( actor_client=ActorClientAsync, @@ -488,33 +529,85 @@ def __init__( ) """Registry of resource client classes used for dependency injection.""" + # Configuration for the default HTTP client (used if a custom client is not provided). + self._max_retries = max_retries + self._min_delay_between_retries = min_delay_between_retries + self._timeout = timeout + self._headers = headers + + @classmethod + def with_custom_client( + cls, + token: str | None = None, + *, + api_url: str = DEFAULT_API_URL, + api_public_url: str | None = DEFAULT_API_URL, + http_client: HttpClientAsync, + ) -> ApifyClientAsync: + """Create an `ApifyClientAsync` instance with a custom HTTP client. + + Use this alternative constructor when you want to provide your own HTTP client implementation + instead of the default one. The custom client is responsible for its own configuration + (retries, timeouts, headers, etc.). + + ### Usage + + ```python + from apify_client import ApifyClientAsync, HttpClientAsync, HttpResponse + + class MyHttpClient(HttpClientAsync): + async def call(self, *, method, url, **kwargs) -> HttpResponse: + ... + + client = ApifyClientAsync.with_custom_client( + token='MY-APIFY-TOKEN', + http_client=MyHttpClient(), + ) + ``` + + Args: + token: The Apify API token. + api_url: The URL of the Apify API server to connect to. Defaults to https://api.apify.com. + api_public_url: The globally accessible URL of the Apify API server. Defaults to https://api.apify.com. + http_client: A custom HTTP client instance extending `HttpClientAsync`. + """ + instance = cls(token=token, api_url=api_url, api_public_url=api_public_url) + instance._http_client = http_client + return instance + + @property + def token(self) -> str | None: + """The Apify API token used by the client.""" + return self._token + + @property + def http_client(self) -> HttpClientAsync: + """The HTTP client instance used for API communication. + + Returns the custom HTTP client if one was provided via `with_custom_client`, + or the default `ImpitHttpClientAsync` otherwise (lazily created on first access). + """ + if self._http_client is None: + self._http_client = ImpitHttpClientAsync( + token=self._token, + timeout=self._timeout, + max_retries=self._max_retries, + min_delay_between_retries=self._min_delay_between_retries, + statistics=self._statistics, + headers=self._headers, + ) + return self._http_client + @cached_property def _base_kwargs(self) -> dict: """Base keyword arguments for resource client construction.""" return { 'base_url': self._base_url, 'public_base_url': self._public_base_url, - 'http_client': self._http_client, + 'http_client': self.http_client, 'client_registry': self._client_registry, } - def _check_custom_headers(self, headers: dict[str, str]) -> None: - """Warn if custom headers override important default headers.""" - overwrite_headers = [key for key in headers if key.title() in self._OVERRIDABLE_DEFAULT_HEADERS] - if overwrite_headers: - warnings.warn( - f'{", ".join(overwrite_headers)} headers of {self.__class__.__name__} was overridden with an ' - 'explicit value. A wrong header value can lead to API errors, it is recommended to use the default ' - f'value for following headers: {", ".join(self._OVERRIDABLE_DEFAULT_HEADERS)}.', - category=UserWarning, - stacklevel=3, - ) - - @property - def token(self) -> str | None: - """The Apify API token used by the client.""" - return self._token - def actor(self, actor_id: str) -> ActorClientAsync: """Get the sub-client for a specific Actor. diff --git a/src/apify_client/_consts.py b/src/apify_client/_consts.py index e61491f1..32d32909 100644 --- a/src/apify_client/_consts.py +++ b/src/apify_client/_consts.py @@ -47,3 +47,6 @@ } ) """Set of terminal Actor job statuses that indicate the job has finished.""" + +OVERRIDABLE_DEFAULT_HEADERS = {'Accept', 'Authorization', 'Accept-Encoding', 'User-Agent'} +"""Headers that can be overridden by users, but will trigger a warning if they do so, as it may lead to API errors.""" diff --git a/src/apify_client/_http_clients/__init__.py b/src/apify_client/_http_clients/__init__.py index 8d096835..1c821677 100644 --- a/src/apify_client/_http_clients/__init__.py +++ b/src/apify_client/_http_clients/__init__.py @@ -1,6 +1,10 @@ -from ._http_client import HttpClient, HttpClientAsync +from ._base import HttpClient, HttpClientAsync, HttpResponse +from ._impit import ImpitHttpClient, ImpitHttpClientAsync __all__ = [ 'HttpClient', 'HttpClientAsync', + 'HttpResponse', + 'ImpitHttpClient', + 'ImpitHttpClientAsync', ] diff --git a/src/apify_client/_http_clients/_base.py b/src/apify_client/_http_clients/_base.py index a91de5cf..cb070ebf 100644 --- a/src/apify_client/_http_clients/_base.py +++ b/src/apify_client/_http_clients/_base.py @@ -4,26 +4,81 @@ import json as jsonlib import os import sys +from abc import ABC, abstractmethod from datetime import UTC, datetime, timedelta from importlib import metadata -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable from urllib.parse import urlencode -import impit - from apify_client._consts import DEFAULT_MAX_RETRIES, DEFAULT_MIN_DELAY_BETWEEN_RETRIES, DEFAULT_TIMEOUT +from apify_client._docs import docs_group from apify_client._statistics import ClientStatistics from apify_client._utils import to_seconds -from apify_client.errors import InvalidResponseBodyError if TYPE_CHECKING: + from collections.abc import AsyncIterator, Iterator, Mapping + from apify_client._consts import JsonSerializable -class BaseHttpClient: - """Base class for HTTP clients with shared configuration and utilities. +@docs_group('HTTP clients') +@runtime_checkable +class HttpResponse(Protocol): + """Protocol for HTTP response objects returned by HTTP clients. + + Any object that has the required attributes and methods can be used as an HTTP response + (e.g., `impit.Response`). This enables custom HTTP client implementations to return + their own response types. + """ + + @property + def status_code(self) -> int: + """HTTP status code of the response.""" + + @property + def text(self) -> str: + """Response body decoded as text.""" + + @property + def content(self) -> bytes: + """Raw response body as bytes.""" + + @property + def headers(self) -> Mapping[str, str]: + """Response headers as a mapping.""" + + def json(self) -> Any: + """Parse response body as JSON.""" + + def read(self) -> bytes: + """Read the entire response body.""" + + async def aread(self) -> bytes: + """Read the entire response body asynchronously.""" + + def close(self) -> None: + """Close the response and release the connection.""" + + async def aclose(self) -> None: + """Close the response and release the connection asynchronously.""" - Subclasses should call `super().__init__()` and create their specific impit client using the `_headers` attribute. + def iter_bytes(self) -> Iterator[bytes]: + """Iterate over the response body in bytes chunks.""" + + def aiter_bytes(self) -> AsyncIterator[bytes]: + """Iterate over the response body in bytes chunks asynchronously.""" + + +@docs_group('HTTP clients') +class HttpClientBase: + """Shared configuration and utilities for HTTP clients. + + Provides common functionality for both sync and async HTTP clients including: + header construction, parameter parsing, request body preparation, URL building, + and timeout calculation. + + Subclasses should call `super().__init__()` to initialize shared configuration. + The helper methods are then available for use in the `call()` implementation. """ def __init__( @@ -36,7 +91,7 @@ def __init__( statistics: ClientStatistics | None = None, headers: dict[str, str] | None = None, ) -> None: - """Initialize the base HTTP client. + """Initialize the HTTP client base. Args: token: Apify API token for authentication. @@ -51,7 +106,7 @@ def __init__( self._min_delay_between_retries = min_delay_between_retries self._statistics = statistics or ClientStatistics() - # Build headers for subclasses to use when creating their impit clients. + # Build default headers. default_headers: dict[str, str] = {'Accept': 'application/json, */*'} workflow_key = os.getenv('APIFY_WORKFLOW_KEY') @@ -94,22 +149,6 @@ def _parse_params(params: dict[str, Any] | None) -> dict[str, Any] | None: return parsed_params - @staticmethod - def _is_retryable_error(exc: Exception) -> bool: - """Check if an exception represents a transient error that should be retried. - - All ``impit.HTTPError`` subclasses are considered retryable because they represent transport-level failures - (network issues, timeouts, protocol errors, body decoding errors) that are typically transient. HTTP status - code errors are handled separately in ``_make_request`` based on the response status code, not here. - """ - return isinstance( - exc, - ( - InvalidResponseBodyError, - impit.HTTPError, - ), - ) - def _prepare_request_call( self, headers: dict[str, str] | None = None, @@ -158,3 +197,90 @@ def _calculate_timeout(self, attempt: int, timeout: timedelta | None = None) -> timeout_secs = to_seconds(timeout or self._timeout) client_timeout_secs = to_seconds(self._timeout) return min(client_timeout_secs, timeout_secs * 2 ** (attempt - 1)) + + +@docs_group('HTTP clients') +class HttpClient(HttpClientBase, ABC): + """Abstract base class for synchronous HTTP clients used by `ApifyClient`. + + Extend this class to create a custom synchronous HTTP client. Override the `call` method + with your implementation. Helper methods from the base class are available for request + preparation, URL building, and parameter parsing. + """ + + @abstractmethod + def call( + self, + *, + method: str, + url: str, + headers: dict[str, str] | None = None, + params: dict[str, Any] | None = None, + data: str | bytes | bytearray | None = None, + json: Any = None, + stream: bool | None = None, + timeout: timedelta | None = None, + ) -> HttpResponse: + """Make an HTTP request. + + Args: + method: HTTP method (GET, POST, PUT, DELETE, etc.). + url: Full URL to make the request to. + headers: Additional headers to include in this request. + params: Query parameters to append to the URL. + data: Raw request body data. Cannot be used together with json. + json: JSON-serializable data for the request body. Cannot be used together with data. + stream: Whether to stream the response body. + timeout: Timeout for this specific request. + + Returns: + The HTTP response object. + + Raises: + ApifyApiError: If the request fails after all retries or returns a non-retryable error status. + ValueError: If both json and data are provided. + """ + ... + + +@docs_group('HTTP clients') +class HttpClientAsync(HttpClientBase, ABC): + """Abstract base class for asynchronous HTTP clients used by `ApifyClientAsync`. + + Extend this class to create a custom asynchronous HTTP client. See `HttpClient` + for details on the expected behavior. + """ + + @abstractmethod + async def call( + self, + *, + method: str, + url: str, + headers: dict[str, str] | None = None, + params: dict[str, Any] | None = None, + data: str | bytes | bytearray | None = None, + json: Any = None, + stream: bool | None = None, + timeout: timedelta | None = None, + ) -> HttpResponse: + """Make an HTTP request. + + Args: + method: HTTP method (GET, POST, PUT, DELETE, etc.). + url: Full URL to make the request to. + headers: Additional headers to include in this request. + params: Query parameters to append to the URL. + data: Raw request body data. Cannot be used together with json. + json: JSON-serializable data for the request body. Cannot be used together with data. + stream: Whether to stream the response body. + timeout: Timeout for this specific request. + + Returns: + The HTTP response object. + + Raises: + ApifyApiError: If the request fails after all retries or returns a non-retryable error status. + ValueError: If both json and data are provided. + """ + ... diff --git a/src/apify_client/_http_clients/_http_client.py b/src/apify_client/_http_clients/_impit.py similarity index 89% rename from src/apify_client/_http_clients/_http_client.py rename to src/apify_client/_http_clients/_impit.py index 7fc10cc2..3aed250f 100644 --- a/src/apify_client/_http_clients/_http_client.py +++ b/src/apify_client/_http_clients/_impit.py @@ -12,15 +12,16 @@ from apify_client._consts import DEFAULT_MAX_RETRIES, DEFAULT_MIN_DELAY_BETWEEN_RETRIES, DEFAULT_TIMEOUT from apify_client._docs import docs_group -from apify_client._http_clients._base import BaseHttpClient +from apify_client._http_clients import HttpClient, HttpClientAsync from apify_client._logging import log_context, logger_name from apify_client._utils import to_seconds -from apify_client.errors import ApifyApiError +from apify_client.errors import ApifyApiError, InvalidResponseBodyError if TYPE_CHECKING: from collections.abc import Awaitable, Callable from apify_client._consts import JsonSerializable + from apify_client._http_clients import HttpResponse from apify_client._statistics import ClientStatistics T = TypeVar('T') @@ -28,13 +29,29 @@ logger = logging.getLogger(logger_name) +def _is_retryable_error(exc: Exception) -> bool: + """Check if an exception represents a transient error that should be retried. + + All `impit.HTTPError` subclasses are considered retryable because they represent transport-level failures + (network issues, timeouts, protocol errors, body decoding errors) that are typically transient. HTTP status + code errors are handled separately in `_make_request` based on the response status code, not here. + """ + return isinstance( + exc, + ( + InvalidResponseBodyError, + impit.HTTPError, + ), + ) + + @docs_group('HTTP clients') -class HttpClient(BaseHttpClient): - """Synchronous HTTP client for the Apify API. +class ImpitHttpClient(HttpClient): + """Synchronous HTTP client for the Apify API built on top of [Impit](https://github.com/apify/impit). - Handles authentication, request serialization, and automatic retries with exponential backoff - for rate-limited (HTTP 429) and server error (HTTP 5xx) responses. Non-retryable errors - (e.g. HTTP 4xx client errors) are raised immediately. + Impit is a high-performance HTTP client written in Rust that provides browser-like TLS fingerprints, + automatic header ordering, and HTTP/2 support. This client wraps `impit.Client` and adds automatic retries + with exponential backoff for rate-limited (HTTP 429) and server error (HTTP 5xx) responses. """ def __init__( @@ -47,7 +64,7 @@ def __init__( statistics: ClientStatistics | None = None, headers: dict[str, str] | None = None, ) -> None: - """Initialize the synchronous HTTP client. + """Initialize the Impit-based synchronous HTTP client. Args: token: Apify API token for authentication. @@ -83,7 +100,7 @@ def call( json: JsonSerializable | None = None, stream: bool | None = None, timeout: timedelta | None = None, - ) -> impit.Response: + ) -> HttpResponse: """Make an HTTP request with automatic retry and exponential backoff. Args: @@ -184,7 +201,7 @@ def _make_request( except Exception as exc: logger.debug('Request threw exception', exc_info=exc) - if not self._is_retryable_error(exc): + if not _is_retryable_error(exc): logger.debug('Exception is not retryable', exc_info=exc) stop_retrying() raise @@ -255,12 +272,12 @@ def stop_retrying() -> None: @docs_group('HTTP clients') -class HttpClientAsync(BaseHttpClient): - """Asynchronous HTTP client for the Apify API. +class ImpitHttpClientAsync(HttpClientAsync): + """Asynchronous HTTP client for the Apify API built on top of [Impit](https://github.com/apify/impit). - Handles authentication, request serialization, and automatic retries with exponential backoff - for rate-limited (HTTP 429) and server error (HTTP 5xx) responses. Non-retryable errors - (e.g. HTTP 4xx client errors) are raised immediately. + Impit is a high-performance HTTP client written in Rust that provides browser-like TLS fingerprints, + automatic header ordering, and HTTP/2 support. This client wraps `impit.AsyncClient` and adds automatic retries + with exponential backoff for rate-limited (HTTP 429) and server error (HTTP 5xx) responses. """ def __init__( @@ -273,7 +290,7 @@ def __init__( statistics: ClientStatistics | None = None, headers: dict[str, str] | None = None, ) -> None: - """Initialize the asynchronous HTTP client. + """Initialize the Impit-based asynchronous HTTP client. Args: token: Apify API token for authentication. @@ -309,7 +326,7 @@ async def call( json: JsonSerializable | None = None, stream: bool | None = None, timeout: timedelta | None = None, - ) -> impit.Response: + ) -> HttpResponse: """Make an HTTP request with automatic retry and exponential backoff. Args: @@ -410,7 +427,7 @@ async def _make_request( except Exception as exc: logger.debug('Request threw exception', exc_info=exc) - if not self._is_retryable_error(exc): + if not _is_retryable_error(exc): logger.debug('Exception is not retryable', exc_info=exc) stop_retrying() raise diff --git a/src/apify_client/_resource_clients/dataset.py b/src/apify_client/_resource_clients/dataset.py index 2720a6cc..f87f13f0 100644 --- a/src/apify_client/_resource_clients/dataset.py +++ b/src/apify_client/_resource_clients/dataset.py @@ -23,9 +23,8 @@ from collections.abc import AsyncIterator, Iterator from datetime import timedelta - import impit - from apify_client._consts import JsonSerializable + from apify_client._http_clients import HttpResponse from apify_client._models import GeneralAccess @@ -490,7 +489,7 @@ def stream_items( xml_root: str | None = None, xml_row: str | None = None, signature: str | None = None, - ) -> Iterator[impit.Response]: + ) -> Iterator[HttpResponse]: """Retrieve the items in the dataset as a stream. https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items @@ -1017,7 +1016,7 @@ async def stream_items( xml_root: str | None = None, xml_row: str | None = None, signature: str | None = None, - ) -> AsyncIterator[impit.Response]: + ) -> AsyncIterator[HttpResponse]: """Retrieve the items in the dataset as a stream. https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items diff --git a/src/apify_client/_resource_clients/key_value_store.py b/src/apify_client/_resource_clients/key_value_store.py index 03119c5f..f3278f4d 100644 --- a/src/apify_client/_resource_clients/key_value_store.py +++ b/src/apify_client/_resource_clients/key_value_store.py @@ -30,12 +30,11 @@ from collections.abc import AsyncIterator, Iterator from datetime import timedelta - from impit import Response - + from apify_client._http_clients import HttpResponse from apify_client._models import GeneralAccess -def _parse_get_record_response(response: Response) -> Any: +def _parse_get_record_response(response: HttpResponse) -> Any: """Parse an HTTP response based on its content type. Args: diff --git a/src/apify_client/_resource_clients/log.py b/src/apify_client/_resource_clients/log.py index 07493ab9..9a19d1a8 100644 --- a/src/apify_client/_resource_clients/log.py +++ b/src/apify_client/_resource_clients/log.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterator - import impit + from apify_client._http_clients import HttpResponse @docs_group('Resource clients') @@ -77,7 +77,7 @@ def get_as_bytes(self, *, raw: bool = False) -> bytes | None: return None @contextmanager - def stream(self, *, raw: bool = False) -> Iterator[impit.Response | None]: + def stream(self, *, raw: bool = False) -> Iterator[HttpResponse | None]: """Retrieve the log as a stream. https://docs.apify.com/api/v2#/reference/logs/log/get-log @@ -169,7 +169,7 @@ async def get_as_bytes(self, *, raw: bool = False) -> bytes | None: return None @asynccontextmanager - async def stream(self, *, raw: bool = False) -> AsyncIterator[impit.Response | None]: + async def stream(self, *, raw: bool = False) -> AsyncIterator[HttpResponse | None]: """Retrieve the log as a stream. https://docs.apify.com/api/v2#/reference/logs/log/get-log diff --git a/src/apify_client/_resource_clients/task.py b/src/apify_client/_resource_clients/task.py index 04d76883..4e91a7a6 100644 --- a/src/apify_client/_resource_clients/task.py +++ b/src/apify_client/_resource_clients/task.py @@ -168,7 +168,7 @@ def start( the Actor run which can be used to receive a notification, e.g. when the Actor finished or failed. If you already have a webhook set up for the Actor or task, you do not have to add it again here. Each webhook is represented by a dictionary containing these items: - * `event_types`: List of ``WebhookEventType`` values which trigger the webhook. + * `event_types`: List of `WebhookEventType` values which trigger the webhook. * `request_url`: URL to which to send the webhook HTTP request. * `payload_template`: Optional template for the request payload. @@ -461,7 +461,7 @@ async def start( the Actor run which can be used to receive a notification, e.g. when the Actor finished or failed. If you already have a webhook set up for the Actor or task, you do not have to add it again here. Each webhook is represented by a dictionary containing these items: - * `event_types`: List of ``WebhookEventType`` values which trigger the webhook. + * `event_types`: List of `WebhookEventType` values which trigger the webhook. * `request_url`: URL to which to send the webhook HTTP request. * `payload_template`: Optional template for the request payload. diff --git a/src/apify_client/_utils.py b/src/apify_client/_utils.py index 00eb297e..83eb536b 100644 --- a/src/apify_client/_utils.py +++ b/src/apify_client/_utils.py @@ -6,6 +6,7 @@ import json import string import time +import warnings from base64 import b64encode, urlsafe_b64encode from enum import Enum from http import HTTPStatus @@ -13,13 +14,13 @@ import impit +from apify_client._consts import OVERRIDABLE_DEFAULT_HEADERS from apify_client.errors import InvalidResponseBodyError if TYPE_CHECKING: from datetime import timedelta - from impit import Response - + from apify_client._http_clients import HttpResponse from apify_client.errors import ApifyApiError T = TypeVar('T') @@ -210,9 +211,9 @@ def enum_to_value(value: Any) -> Any: def is_retryable_error(exc: Exception) -> bool: """Check if the given error is retryable. - All ``impit.HTTPError`` subclasses are considered retryable because they represent transport-level failures + All `impit.HTTPError` subclasses are considered retryable because they represent transport-level failures (network issues, timeouts, protocol errors, body decoding errors) that are typically transient. HTTP status - code errors are handled separately in ``_make_request`` based on the response status code, not here. + code errors are handled separately in `_make_request` based on the response status code, not here. """ return isinstance( exc, @@ -235,7 +236,7 @@ def to_safe_id(id: str) -> str: return id.replace('/', '~') -def response_to_dict(response: Response) -> dict: +def response_to_dict(response: HttpResponse) -> dict: """Parse the API response as a dictionary and validate its type. Args: @@ -255,7 +256,7 @@ def response_to_dict(response: Response) -> dict: raise ValueError(f'The response is not a dictionary. Got: {type(data).__name__}') -def response_to_list(response: Response) -> list: +def response_to_list(response: HttpResponse) -> list: """Parse the API response as a list and validate its type. Args: @@ -347,3 +348,17 @@ def create_storage_content_signature( base64url_encoded_payload = urlsafe_b64encode(f'{version}.{expires_at}.{hmac_sig}'.encode()) return base64url_encoded_payload.decode('utf-8') + + +def check_custom_headers(class_name: str, headers: dict[str, str]) -> None: + """Warn if custom headers override important default headers.""" + overwrite_headers = [key for key in headers if key.title() in OVERRIDABLE_DEFAULT_HEADERS] + + if overwrite_headers: + warnings.warn( + f'{", ".join(overwrite_headers)} headers of {class_name} was overridden with an ' + 'explicit value. A wrong header value can lead to API errors, it is recommended to use the default ' + f'value for following headers: {", ".join(OVERRIDABLE_DEFAULT_HEADERS)}.', + category=UserWarning, + stacklevel=3, + ) diff --git a/src/apify_client/errors.py b/src/apify_client/errors.py index ba167d33..90c2b147 100644 --- a/src/apify_client/errors.py +++ b/src/apify_client/errors.py @@ -5,7 +5,7 @@ from apify_client._docs import docs_group if TYPE_CHECKING: - import impit + from apify_client._http_clients import HttpResponse @docs_group('Errors') @@ -35,7 +35,7 @@ class ApifyApiError(ApifyClientError): data: Additional error data from the API response. """ - def __init__(self, response: impit.Response, attempt: int, method: str = 'GET') -> None: + def __init__(self, response: HttpResponse, attempt: int, method: str = 'GET') -> None: """Initialize the API error from a failed response. Args: @@ -82,7 +82,7 @@ class InvalidResponseBodyError(ApifyClientError): is only raised after all retry attempts have been exhausted. """ - def __init__(self, response: impit.Response) -> None: + def __init__(self, response: HttpResponse) -> None: """Initialize the error from an unparsable response. Args: diff --git a/tests/unit/test_client_errors.py b/tests/unit/test_client_errors.py index b236f533..03af5d57 100644 --- a/tests/unit/test_client_errors.py +++ b/tests/unit/test_client_errors.py @@ -1,18 +1,15 @@ from __future__ import annotations import json -import time from typing import TYPE_CHECKING import pytest from werkzeug import Response -from apify_client._http_clients import HttpClient, HttpClientAsync +from apify_client._http_clients import ImpitHttpClient, ImpitHttpClientAsync from apify_client.errors import ApifyApiError if TYPE_CHECKING: - from collections.abc import Iterator - from pytest_httpserver import HTTPServer from werkzeug import Request @@ -44,12 +41,6 @@ def test_endpoint(httpserver: HTTPServer) -> str: def streaming_handler(_request: Request) -> Response: """Handler for streaming log requests.""" - - def generate_response() -> Iterator[bytes]: - for i in range(len(RAW_ERROR)): - yield RAW_ERROR[i : i + 1] - time.sleep(0.01) - return Response( response=(RAW_ERROR[i : i + 1] for i in range(len(RAW_ERROR))), status=403, @@ -60,7 +51,7 @@ def generate_response() -> Iterator[bytes]: def test_client_apify_api_error_with_data(test_endpoint: str) -> None: """Test that client correctly throws ApifyApiError with error data from response.""" - client = HttpClient() + client = ImpitHttpClient() with pytest.raises(ApifyApiError) as exc: client.call(method='GET', url=test_endpoint) @@ -72,7 +63,7 @@ def test_client_apify_api_error_with_data(test_endpoint: str) -> None: async def test_async_client_apify_api_error_with_data(test_endpoint: str) -> None: """Test that async client correctly throws ApifyApiError with error data from response.""" - client = HttpClientAsync() + client = ImpitHttpClientAsync() with pytest.raises(ApifyApiError) as exc: await client.call(method='GET', url=test_endpoint) @@ -87,7 +78,7 @@ def test_client_apify_api_error_streamed(httpserver: HTTPServer) -> None: error = json.loads(RAW_ERROR.decode()) - client = HttpClient() + client = ImpitHttpClient() httpserver.expect_request('/stream_error').respond_with_handler(streaming_handler) @@ -103,7 +94,7 @@ async def test_async_client_apify_api_error_streamed(httpserver: HTTPServer) -> error = json.loads(RAW_ERROR.decode()) - client = HttpClientAsync() + client = ImpitHttpClientAsync() httpserver.expect_request('/stream_error').respond_with_handler(streaming_handler) diff --git a/tests/unit/test_client_headers.py b/tests/unit/test_client_headers.py index cf95c5a6..75c02687 100644 --- a/tests/unit/test_client_headers.py +++ b/tests/unit/test_client_headers.py @@ -6,11 +6,9 @@ from importlib import metadata from typing import TYPE_CHECKING -import pytest from werkzeug import Request, Response -from apify_client import ApifyClient, ApifyClientAsync -from apify_client._http_clients import HttpClient, HttpClientAsync +from apify_client._http_clients import ImpitHttpClient, ImpitHttpClientAsync if TYPE_CHECKING: from pytest_httpserver import HTTPServer @@ -33,7 +31,7 @@ def _get_user_agent() -> str: async def test_default_headers_async(httpserver: HTTPServer) -> None: """Test that default headers are sent with each request.""" - client = HttpClientAsync(token='placeholder_token') + client = ImpitHttpClientAsync(token='placeholder_token') httpserver.expect_request('/').respond_with_handler(_header_handler) api_url = httpserver.url_for('/').removesuffix('/') @@ -52,7 +50,7 @@ async def test_default_headers_async(httpserver: HTTPServer) -> None: def test_default_headers_sync(httpserver: HTTPServer) -> None: """Test that default headers are sent with each request.""" - client = HttpClient(token='placeholder_token') + client = ImpitHttpClient(token='placeholder_token') httpserver.expect_request('/').respond_with_handler(_header_handler) api_url = httpserver.url_for('/').removesuffix('/') @@ -71,7 +69,7 @@ def test_default_headers_sync(httpserver: HTTPServer) -> None: async def test_headers_async(httpserver: HTTPServer) -> None: """Test that custom headers are sent with each request.""" - client = HttpClientAsync( + client = ImpitHttpClientAsync( token='placeholder_token', headers={'Test-Header': 'blah', 'User-Agent': 'CustomUserAgent/1.0', 'Authorization': 'strange_value'}, ) @@ -94,7 +92,7 @@ async def test_headers_async(httpserver: HTTPServer) -> None: def test_headers_sync(httpserver: HTTPServer) -> None: """Test that custom headers are sent with each request.""" - client = HttpClient( + client = ImpitHttpClient( token='placeholder_token', headers={ 'Test-Header': 'blah', @@ -117,27 +115,3 @@ def test_headers_sync(httpserver: HTTPServer) -> None: 'Accept-Encoding': 'gzip, br, zstd, deflate', 'Host': f'{httpserver.host}:{httpserver.port}', } - - -def test_warning_on_overridden_headers_sync() -> None: - """Test that warning is raised when default headers are overridden.""" - with pytest.warns(UserWarning, match='User-Agent, Authorization headers of ApifyClient'): - ApifyClient( - token='placeholder_token', - headers={ - 'User-Agent': 'CustomUserAgent/1.0', - 'Authorization': 'strange_value', - }, - ) - - -async def test_warning_on_overridden_headers_async() -> None: - """Test that warning is raised when default headers are overridden.""" - with pytest.warns(UserWarning, match='User-Agent, Authorization headers of ApifyClientAsync'): - ApifyClientAsync( - token='placeholder_token', - headers={ - 'User-Agent': 'CustomUserAgent/1.0', - 'Authorization': 'strange_value', - }, - ) diff --git a/tests/unit/test_client_timeouts.py b/tests/unit/test_client_timeouts.py index 0cd90aad..356c2829 100644 --- a/tests/unit/test_client_timeouts.py +++ b/tests/unit/test_client_timeouts.py @@ -7,7 +7,7 @@ import pytest from impit import HTTPError, Response, TimeoutException -from apify_client._http_clients import HttpClient, HttpClientAsync +from apify_client._http_clients import ImpitHttpClient, ImpitHttpClientAsync if TYPE_CHECKING: from collections.abc import Iterator @@ -58,7 +58,7 @@ async def mock_request(*_args: Any, **kwargs: Any) -> Response: monkeypatch.setattr('impit.AsyncClient.request', mock_request) - response = await HttpClientAsync(timeout=timedelta(seconds=client_timeout)).call( + response = await ImpitHttpClientAsync(timeout=timedelta(seconds=client_timeout)).call( method='GET', url='http://placeholder.url/async_timeout', timeout=timedelta(seconds=call_timeout) ) @@ -89,7 +89,7 @@ async def mock_request(*_args: Any, **_kwargs: Any) -> Response: monkeypatch.setattr('impit.AsyncClient.request', mock_request) - response = await HttpClientAsync(timeout=timedelta(seconds=5)).call( + response = await ImpitHttpClientAsync(timeout=timedelta(seconds=5)).call( method='GET', url='http://placeholder.url/http_error' ) @@ -122,7 +122,7 @@ def mock_request(*_args: Any, **kwargs: Any) -> Response: monkeypatch.setattr('impit.Client.request', mock_request) - response = HttpClient(timeout=timedelta(seconds=client_timeout)).call( + response = ImpitHttpClient(timeout=timedelta(seconds=client_timeout)).call( method='GET', url='http://placeholder.url/sync_timeout', timeout=timedelta(seconds=call_timeout) ) diff --git a/tests/unit/test_http_clients.py b/tests/unit/test_http_clients.py index 2f60c7f5..0f1218f1 100644 --- a/tests/unit/test_http_clients.py +++ b/tests/unit/test_http_clients.py @@ -8,12 +8,26 @@ import impit import pytest -from apify_client._http_clients import HttpClient, HttpClientAsync -from apify_client._http_clients._base import BaseHttpClient +from apify_client._http_clients import HttpClient, HttpClientAsync, HttpResponse, ImpitHttpClient, ImpitHttpClientAsync +from apify_client._http_clients._impit import _is_retryable_error from apify_client._statistics import ClientStatistics from apify_client.errors import InvalidResponseBodyError +class _ConcreteHttpClient(HttpClient): + """Minimal concrete HttpClient for testing base class helpers.""" + + def call(self, *, method: str, url: str, **kwargs: Any) -> HttpResponse: + raise NotImplementedError + + +class _ConcreteHttpClientAsync(HttpClientAsync): + """Minimal concrete HttpClientAsync for testing base class helpers.""" + + async def call(self, *, method: str, url: str, **kwargs: Any) -> HttpResponse: + raise NotImplementedError + + def test_retry_with_exp_backoff() -> None: attempt_counter = 0 @@ -43,19 +57,19 @@ def bails_on_third_attempt(stop_retrying: Callable, attempt: int) -> Any: # Returns the correct result after the correct time (should take 100 + 200 + 400 + 800 = 1500 ms) start = time.time() - result = HttpClient._retry_with_exp_backoff( + result = ImpitHttpClient._retry_with_exp_backoff( returns_on_fifth_attempt, backoff_base=timedelta(milliseconds=100), backoff_factor=2, random_factor=0 ) elapsed_time_seconds = time.time() - start assert result == 'SUCCESS' assert attempt_counter == 5 assert elapsed_time_seconds > 1.4 - assert elapsed_time_seconds < 2.0 + assert elapsed_time_seconds < 3.0 # Stops retrying when failed for max_retries times attempt_counter = 0 with pytest.raises(RetryableError): - HttpClient._retry_with_exp_backoff( + ImpitHttpClient._retry_with_exp_backoff( returns_on_fifth_attempt, max_retries=3, backoff_base=timedelta(milliseconds=1) ) assert attempt_counter == 4 @@ -63,7 +77,7 @@ def bails_on_third_attempt(stop_retrying: Callable, attempt: int) -> Any: # Bails when the bail function is called attempt_counter = 0 with pytest.raises(NonRetryableError): - HttpClient._retry_with_exp_backoff(bails_on_third_attempt, backoff_base=timedelta(milliseconds=1)) + ImpitHttpClient._retry_with_exp_backoff(bails_on_third_attempt, backoff_base=timedelta(milliseconds=1)) assert attempt_counter == 3 @@ -96,19 +110,19 @@ async def bails_on_third_attempt(stop_retrying: Callable, attempt: int) -> Any: # Returns the correct result after the correct time (should take 100 + 200 + 400 + 800 = 1500 ms) start = time.time() - result = await HttpClientAsync._retry_with_exp_backoff( + result = await ImpitHttpClientAsync._retry_with_exp_backoff( returns_on_fifth_attempt, backoff_base=timedelta(milliseconds=100), backoff_factor=2, random_factor=0 ) elapsed_time_seconds = time.time() - start assert result == 'SUCCESS' assert attempt_counter == 5 assert elapsed_time_seconds > 1.4 - assert elapsed_time_seconds < 2.0 + assert elapsed_time_seconds < 3.0 # Stops retrying when failed for max_retries times attempt_counter = 0 with pytest.raises(RetryableError): - await HttpClientAsync._retry_with_exp_backoff( + await ImpitHttpClientAsync._retry_with_exp_backoff( returns_on_fifth_attempt, max_retries=3, backoff_base=timedelta(milliseconds=1) ) assert attempt_counter == 4 @@ -116,15 +130,17 @@ async def bails_on_third_attempt(stop_retrying: Callable, attempt: int) -> Any: # Bails when the bail function is called attempt_counter = 0 with pytest.raises(NonRetryableError): - await HttpClientAsync._retry_with_exp_backoff(bails_on_third_attempt, backoff_base=timedelta(milliseconds=1)) + await ImpitHttpClientAsync._retry_with_exp_backoff( + bails_on_third_attempt, backoff_base=timedelta(milliseconds=1) + ) assert attempt_counter == 3 def test_base_http_client_initialization() -> None: - """Test BaseHttpClient initialization with various configurations.""" + """Test HttpClient initialization with various configurations.""" statistics = ClientStatistics() - client = BaseHttpClient( + client = _ConcreteHttpClient( token='test_token', timeout=timedelta(seconds=30), max_retries=5, @@ -139,13 +155,13 @@ def test_base_http_client_initialization() -> None: assert client._headers['Authorization'] == 'Bearer test_token' # Test without statistics (should create default) - client2 = BaseHttpClient(token='test_token') + client2 = _ConcreteHttpClient(token='test_token') assert isinstance(client2._statistics, ClientStatistics) def test_http_client_creates_sync_impit_client() -> None: - """Test that HttpClient creates sync impit client correctly.""" - client = HttpClient(token='test_token_123') + """Test that ImpitHttpClient creates sync impit client correctly.""" + client = ImpitHttpClient(token='test_token_123') # Check that sync impit client is created assert client._impit_client is not None @@ -153,8 +169,8 @@ def test_http_client_creates_sync_impit_client() -> None: def test_http_client_async_creates_async_impit_client() -> None: - """Test that HttpClientAsync creates async impit client correctly.""" - client = HttpClientAsync(token='test_token_123') + """Test that ImpitHttpClientAsync creates async impit client correctly.""" + client = ImpitHttpClientAsync(token='test_token_123') # Check that async impit client is created assert client._impit_async_client is not None @@ -163,38 +179,38 @@ def test_http_client_async_creates_async_impit_client() -> None: def test_parse_params_none() -> None: """Test _parse_params with None input.""" - assert BaseHttpClient._parse_params(None) is None + assert HttpClient._parse_params(None) is None def test_parse_params_boolean() -> None: """Test _parse_params converts booleans to integers.""" - result = BaseHttpClient._parse_params({'flag': True, 'disabled': False}) + result = HttpClient._parse_params({'flag': True, 'disabled': False}) assert result == {'flag': 1, 'disabled': 0} def test_parse_params_list() -> None: """Test _parse_params converts lists to comma-separated strings.""" - result = BaseHttpClient._parse_params({'ids': ['id1', 'id2', 'id3']}) + result = HttpClient._parse_params({'ids': ['id1', 'id2', 'id3']}) assert result == {'ids': 'id1,id2,id3'} def test_parse_params_datetime() -> None: """Test _parse_params converts datetime to Zulu format.""" dt = datetime(2024, 1, 15, 10, 30, 45, 123000, tzinfo=UTC) - result = BaseHttpClient._parse_params({'created_at': dt}) + result = HttpClient._parse_params({'created_at': dt}) assert result == {'created_at': '2024-01-15T10:30:45.123Z'} def test_parse_params_none_values_filtered() -> None: """Test _parse_params filters out None values.""" - result = BaseHttpClient._parse_params({'a': 1, 'b': None, 'c': 'value'}) + result = HttpClient._parse_params({'a': 1, 'b': None, 'c': 'value'}) assert result == {'a': 1, 'c': 'value'} def test_parse_params_mixed() -> None: """Test _parse_params with mixed types.""" dt = datetime(2024, 1, 15, 10, 30, 45, 123000, tzinfo=UTC) - result = BaseHttpClient._parse_params( + result = HttpClient._parse_params( { 'limit': 10, 'offset': 0, @@ -218,20 +234,20 @@ def test_parse_params_mixed() -> None: def test_is_retryable_error() -> None: """Test _is_retryable_error correctly identifies retryable errors.""" mock_response = Mock() - assert BaseHttpClient._is_retryable_error(InvalidResponseBodyError(mock_response)) - assert BaseHttpClient._is_retryable_error(impit.NetworkError('test')) - assert BaseHttpClient._is_retryable_error(impit.TimeoutException('test')) - assert BaseHttpClient._is_retryable_error(impit.RemoteProtocolError('test')) + assert _is_retryable_error(InvalidResponseBodyError(mock_response)) + assert _is_retryable_error(impit.NetworkError('test')) + assert _is_retryable_error(impit.TimeoutException('test')) + assert _is_retryable_error(impit.RemoteProtocolError('test')) # Non-retryable errors - assert not BaseHttpClient._is_retryable_error(ValueError('test')) - assert not BaseHttpClient._is_retryable_error(RuntimeError('test')) - assert not BaseHttpClient._is_retryable_error(Exception('test')) + assert not _is_retryable_error(ValueError('test')) + assert not _is_retryable_error(RuntimeError('test')) + assert not _is_retryable_error(Exception('test')) def test_prepare_request_call_basic() -> None: """Test _prepare_request_call with basic parameters.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() headers, params, data = client._prepare_request_call() assert headers == {} @@ -241,7 +257,7 @@ def test_prepare_request_call_basic() -> None: def test_prepare_request_call_with_json() -> None: """Test _prepare_request_call with JSON data.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() json_data = {'key': 'value', 'number': 42} headers, _params, data = client._prepare_request_call(json=json_data) @@ -254,7 +270,7 @@ def test_prepare_request_call_with_json() -> None: def test_prepare_request_call_with_empty_dict_json() -> None: """Test _prepare_request_call with empty dict JSON (falsy but valid).""" - client = BaseHttpClient() + client = _ConcreteHttpClient() headers, _params, data = client._prepare_request_call(json={}) @@ -269,7 +285,7 @@ def test_prepare_request_call_with_empty_dict_json() -> None: def test_prepare_request_call_with_empty_list_json() -> None: """Test _prepare_request_call with empty list JSON (falsy but valid).""" - client = BaseHttpClient() + client = _ConcreteHttpClient() headers, _params, data = client._prepare_request_call(json=[]) @@ -284,7 +300,7 @@ def test_prepare_request_call_with_empty_list_json() -> None: def test_prepare_request_call_with_zero_json() -> None: """Test _prepare_request_call with zero JSON (falsy but valid).""" - client = BaseHttpClient() + client = _ConcreteHttpClient() headers, _params, data = client._prepare_request_call(json=0) @@ -299,7 +315,7 @@ def test_prepare_request_call_with_zero_json() -> None: def test_prepare_request_call_with_false_json() -> None: """Test _prepare_request_call with False JSON (falsy but valid).""" - client = BaseHttpClient() + client = _ConcreteHttpClient() headers, _params, data = client._prepare_request_call(json=False) @@ -314,7 +330,7 @@ def test_prepare_request_call_with_false_json() -> None: def test_prepare_request_call_with_empty_string_json() -> None: """Test _prepare_request_call with empty string JSON (falsy but valid).""" - client = BaseHttpClient() + client = _ConcreteHttpClient() headers, _params, data = client._prepare_request_call(json='') @@ -329,7 +345,7 @@ def test_prepare_request_call_with_empty_string_json() -> None: def test_prepare_request_call_with_string_data() -> None: """Test _prepare_request_call with string data.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() headers, _params, data = client._prepare_request_call(data='test string') @@ -339,7 +355,7 @@ def test_prepare_request_call_with_string_data() -> None: def test_prepare_request_call_with_bytes_data() -> None: """Test _prepare_request_call with bytes data.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() headers, _params, data = client._prepare_request_call(data=b'test bytes') @@ -349,7 +365,7 @@ def test_prepare_request_call_with_bytes_data() -> None: def test_prepare_request_call_json_and_data_error() -> None: """Test _prepare_request_call raises error when both json and data are provided.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() with pytest.raises(ValueError, match='Cannot pass both "json" and "data" parameters'): client._prepare_request_call(json={'key': 'value'}, data='string') @@ -357,7 +373,7 @@ def test_prepare_request_call_json_and_data_error() -> None: def test_prepare_request_call_with_params() -> None: """Test _prepare_request_call parses params correctly.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() _headers, params, _data = client._prepare_request_call(params={'limit': 10, 'flag': True}) @@ -366,7 +382,7 @@ def test_prepare_request_call_with_params() -> None: def test_build_url_with_params_none() -> None: """Test _build_url_with_params with None params.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() url = client._build_url_with_params('https://api.test.com/endpoint') assert url == 'https://api.test.com/endpoint' @@ -374,7 +390,7 @@ def test_build_url_with_params_none() -> None: def test_build_url_with_params_simple() -> None: """Test _build_url_with_params with simple params.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() url = client._build_url_with_params('https://api.test.com/endpoint', {'key': 'value', 'limit': 10}) assert 'key=value' in url @@ -384,7 +400,7 @@ def test_build_url_with_params_simple() -> None: def test_build_url_with_params_list() -> None: """Test _build_url_with_params with list values.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() url = client._build_url_with_params('https://api.test.com/endpoint', {'tags': ['tag1', 'tag2', 'tag3']}) assert 'tags=tag1' in url @@ -394,7 +410,7 @@ def test_build_url_with_params_list() -> None: def test_build_url_with_params_mixed() -> None: """Test _build_url_with_params with mixed param types.""" - client = BaseHttpClient() + client = _ConcreteHttpClient() url = client._build_url_with_params( 'https://api.test.com/endpoint', {'limit': 10, 'tags': ['a', 'b'], 'name': 'test'} diff --git a/tests/unit/test_pluggable_http_client.py b/tests/unit/test_pluggable_http_client.py new file mode 100644 index 00000000..aac131c7 --- /dev/null +++ b/tests/unit/test_pluggable_http_client.py @@ -0,0 +1,425 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any + +import pytest + +import apify_client as apify_client_module +from apify_client import ( + ApifyClient, + ApifyClientAsync, + HttpClient, + HttpClientAsync, + HttpResponse, +) +from apify_client._http_clients import ImpitHttpClient, ImpitHttpClientAsync +from apify_client.errors import ApifyApiError + +if TYPE_CHECKING: + from collections.abc import AsyncIterator, Iterator + from datetime import timedelta + + from pytest_httpserver import HTTPServer + + +# -- Test response and client implementations -- + + +@dataclass +class FakeResponse: + """A minimal response object that satisfies the HttpResponse protocol.""" + + status_code: int = 200 + text: str = '' + content: bytes = b'' + headers: dict[str, str] = field(default_factory=dict) + _json: Any = field(default=None, repr=False) + + def json(self) -> Any: + return self._json + + def read(self) -> bytes: + return self.content + + async def aread(self) -> bytes: + return self.content + + def close(self) -> None: + pass + + async def aclose(self) -> None: + pass + + def iter_bytes(self) -> Iterator[bytes]: + yield self.content + + async def aiter_bytes(self) -> AsyncIterator[bytes]: + yield self.content + + +def _make_fake_response() -> FakeResponse: + """Create a standard fake response for testing.""" + return FakeResponse( + status_code=200, + text='{"data": {"id": "test123"}}', + content=b'{"data": {"id": "test123"}}', + headers={'content-type': 'application/json'}, + _json={'data': {'id': 'test123'}}, + ) + + +class FakeHttpClient(HttpClient): + """A custom sync HTTP client for testing.""" + + def __init__(self) -> None: + super().__init__() + self.calls: list[dict[str, Any]] = [] + + def call( + self, + *, + method: str, + url: str, + headers: dict[str, str] | None = None, + params: dict[str, Any] | None = None, + data: str | bytes | bytearray | None = None, + json: Any = None, + stream: bool | None = None, + timeout: timedelta | None = None, + ) -> FakeResponse: + self.calls.append( + { + 'method': method, + 'url': url, + 'headers': headers, + 'params': params, + 'data': data, + 'json': json, + 'stream': stream, + 'timeout': timeout, + } + ) + return _make_fake_response() + + +class FakeHttpClientAsync(HttpClientAsync): + """A custom async HTTP client for testing.""" + + def __init__(self) -> None: + super().__init__() + self.calls: list[dict[str, Any]] = [] + + async def call( + self, + *, + method: str, + url: str, + headers: dict[str, str] | None = None, + params: dict[str, Any] | None = None, + data: str | bytes | bytearray | None = None, + json: Any = None, + stream: bool | None = None, + timeout: timedelta | None = None, + ) -> FakeResponse: + self.calls.append( + { + 'method': method, + 'url': url, + 'headers': headers, + 'params': params, + 'data': data, + 'json': json, + 'stream': stream, + 'timeout': timeout, + } + ) + return _make_fake_response() + + +# -- Protocol / ABC conformance tests -- + + +def test_fake_response_satisfies_http_response_protocol() -> None: + """Test that FakeResponse satisfies the HttpResponse protocol.""" + response = FakeResponse() + assert isinstance(response, HttpResponse) + + +def test_fake_http_client_is_http_client() -> None: + """Test that FakeHttpClient is an instance of HttpClient.""" + client = FakeHttpClient() + assert isinstance(client, HttpClient) + + +def test_fake_http_client_async_is_http_client_async() -> None: + """Test that FakeHttpClientAsync is an instance of HttpClientAsync.""" + client = FakeHttpClientAsync() + assert isinstance(client, HttpClientAsync) + + +def test_apify_http_client_is_http_client() -> None: + """Test that ImpitHttpClient is an instance of HttpClient.""" + client = ImpitHttpClient() + assert isinstance(client, HttpClient) + + +def test_apify_http_client_async_is_http_client_async() -> None: + """Test that ImpitHttpClientAsync is an instance of HttpClientAsync.""" + client = ImpitHttpClientAsync() + assert isinstance(client, HttpClientAsync) + + +async def test_fake_response_async_methods() -> None: + """Test that FakeResponse async methods work correctly.""" + response = FakeResponse(content=b'hello') + assert await response.aread() == b'hello' + await response.aclose() # should not raise + chunks = [chunk async for chunk in response.aiter_bytes()] + assert chunks == [b'hello'] + + +def test_http_client_abc_not_instantiable() -> None: + """Test that HttpClient cannot be instantiated directly (it's abstract).""" + with pytest.raises(TypeError, match='abstract method'): + HttpClient() + + +def test_http_client_async_abc_not_instantiable() -> None: + """Test that HttpClientAsync cannot be instantiated directly (it's abstract).""" + with pytest.raises(TypeError, match='abstract method'): + HttpClientAsync() + + +# -- ApifyClient with custom http_client via classmethod -- + + +def test_apify_client_with_custom_client() -> None: + """Test that ApifyClient.with_custom_client accepts a custom http_client.""" + fake_client = FakeHttpClient() + client = ApifyClient.with_custom_client(token='test_token', http_client=fake_client) + + assert client.http_client is fake_client + + +def test_apify_client_uses_default_http_client_when_none_provided() -> None: + """Test that ApifyClient creates default ImpitHttpClient when no http_client is provided.""" + client = ApifyClient(token='test_token') + + assert isinstance(client.http_client, ImpitHttpClient) + + +def test_apify_client_custom_http_client_receives_requests() -> None: + """Test that requests flow through the custom HTTP client.""" + fake_client = FakeHttpClient() + client = ApifyClient.with_custom_client(token='test_token', http_client=fake_client) + + # Use _get() via the dataset client to avoid Pydantic model validation + # (actor.get() would try to validate against ActorResponse model) + result = client.dataset('test-dataset')._get() + + assert len(fake_client.calls) == 1 + call = fake_client.calls[0] + assert call['method'] == 'GET' + assert 'test-dataset' in call['url'] + assert result == {'data': {'id': 'test123'}} + + +def test_apify_client_with_custom_client_accepts_url_params() -> None: + """Test that with_custom_client can be combined with token, api_url, and api_public_url.""" + fake_client = FakeHttpClient() + client = ApifyClient.with_custom_client( + token='test_token', + api_url='https://custom.api.example.com', + api_public_url='https://public.api.example.com', + http_client=fake_client, + ) + assert client.http_client is fake_client + + +# -- ApifyClientAsync with custom http_client via classmethod -- + + +async def test_apify_client_async_with_custom_client() -> None: + """Test that ApifyClientAsync.with_custom_client accepts a custom http_client.""" + fake_client = FakeHttpClientAsync() + client = ApifyClientAsync.with_custom_client(token='test_token', http_client=fake_client) + + assert client.http_client is fake_client + + +async def test_apify_client_async_uses_default_http_client_when_none_provided() -> None: + """Test that ApifyClientAsync creates default ImpitHttpClientAsync when no http_client is provided.""" + client = ApifyClientAsync(token='test_token') + + assert isinstance(client.http_client, ImpitHttpClientAsync) + + +async def test_apify_client_async_custom_http_client_receives_requests() -> None: + """Test that async requests flow through the custom HTTP client.""" + fake_client = FakeHttpClientAsync() + client = ApifyClientAsync.with_custom_client(token='test_token', http_client=fake_client) + + # Use _get() via the dataset client to avoid Pydantic model validation + result = await client.dataset('test-dataset')._get() + + assert len(fake_client.calls) == 1 + call = fake_client.calls[0] + assert call['method'] == 'GET' + assert 'test-dataset' in call['url'] + assert result == {'data': {'id': 'test123'}} + + +async def test_apify_client_async_with_custom_client_accepts_url_params() -> None: + """Test that async with_custom_client can be combined with token, api_url, and api_public_url.""" + fake_client = FakeHttpClientAsync() + client = ApifyClientAsync.with_custom_client( + token='test_token', + api_url='https://custom.api.example.com', + api_public_url='https://public.api.example.com', + http_client=fake_client, + ) + assert client.http_client is fake_client + + +# -- Public exports -- + + +def test_public_exports() -> None: + """Test that all HTTP client types are available from the public API.""" + assert hasattr(apify_client_module, 'HttpClient') + assert hasattr(apify_client_module, 'HttpClientAsync') + assert hasattr(apify_client_module, 'HttpResponse') + assert hasattr(apify_client_module, 'ImpitHttpClient') + assert hasattr(apify_client_module, 'ImpitHttpClientAsync') + + +# -- http_client property -- + + +def test_apify_client_http_client_property_returns_correct_type() -> None: + """Test that http_client property returns the correct type.""" + # With default + client = ApifyClient(token='test') + assert isinstance(client.http_client, HttpClient) + + # With custom + fake = FakeHttpClient() + client2 = ApifyClient.with_custom_client(token='test', http_client=fake) + assert client2.http_client is fake + + +async def test_apify_client_async_http_client_property_returns_correct_type() -> None: + """Test that http_client property returns the correct type for async client.""" + # With default + client = ApifyClientAsync(token='test') + assert isinstance(client.http_client, HttpClientAsync) + + # With custom + fake = FakeHttpClientAsync() + client2 = ApifyClientAsync.with_custom_client(token='test', http_client=fake) + assert client2.http_client is fake + + +# -- Error handling with custom http_client -- + + +class ErrorRaisingHttpClient(HttpClient): + """A custom HTTP client that raises ApifyApiError.""" + + def call(self, *, method: str, **_kwargs: Any) -> FakeResponse: + error_response = FakeResponse( + status_code=404, + text='{"error": {"message": "Actor not found", "type": "record-not-found"}}', + _json={'error': {'message': 'Actor not found', 'type': 'record-not-found'}}, + ) + raise ApifyApiError(error_response, attempt=1, method=method) + + +def test_custom_http_client_error_handling() -> None: + """Test that ApifyApiError from custom client is handled correctly by resource clients.""" + error_client = ErrorRaisingHttpClient() + client = ApifyClient.with_custom_client(token='test', http_client=error_client) + + # _get() should catch 404 record-not-found and return None + result = client.actor('nonexistent').get() + assert result is None + + +class ErrorRaisingHttpClientAsync(HttpClientAsync): + """A custom async HTTP client that raises ApifyApiError.""" + + async def call(self, *, method: str, **_kwargs: Any) -> FakeResponse: + error_response = FakeResponse( + status_code=404, + text='{"error": {"message": "Actor not found", "type": "record-not-found"}}', + _json={'error': {'message': 'Actor not found', 'type': 'record-not-found'}}, + ) + raise ApifyApiError(error_response, attempt=1, method=method) + + +async def test_custom_http_client_async_error_handling() -> None: + """Test that ApifyApiError from async custom client is handled correctly by resource clients.""" + error_client = ErrorRaisingHttpClientAsync() + client = ApifyClientAsync.with_custom_client(token='test', http_client=error_client) + + # _get() should catch 404 record-not-found and return None + result = await client.actor('nonexistent').get() + assert result is None + + +# -- Integration with real HTTP server -- + + +def test_custom_http_client_with_real_server(httpserver: HTTPServer) -> None: + """Test that a custom HTTP client wrapping ImpitHttpClient works with a real server.""" + httpserver.expect_request('/v2/datasets/test-dataset').respond_with_json( + {'data': {'id': 'test-dataset', 'name': 'My Dataset'}}, + ) + + # Create a wrapping client that adds custom headers + inner_client = ImpitHttpClient(token='test_token') + + class WrappingHttpClient(HttpClient): + def call(self, *, method: str, url: str, **kwargs: Any) -> HttpResponse: + if kwargs.get('headers') is None: + kwargs['headers'] = {} + kwargs['headers']['X-Custom-Header'] = 'custom-value' + return inner_client.call(method=method, url=url, **kwargs) + + api_url = httpserver.url_for('/').removesuffix('/') + client = ApifyClient.with_custom_client(token='test_token', api_url=api_url, http_client=WrappingHttpClient()) + + # Use _get() to test the raw request flow without Pydantic validation + result = client.dataset('test-dataset')._get() + + assert result is not None + assert result['data']['id'] == 'test-dataset' + + +async def test_custom_http_client_async_with_real_server(httpserver: HTTPServer) -> None: + """Test that a custom async HTTP client wrapping ImpitHttpClientAsync works with a real server.""" + httpserver.expect_request('/v2/datasets/test-dataset').respond_with_json( + {'data': {'id': 'test-dataset', 'name': 'My Dataset'}}, + ) + + # Create a wrapping client that adds custom headers + inner_client = ImpitHttpClientAsync(token='test_token') + + class WrappingHttpClientAsync(HttpClientAsync): + async def call(self, *, method: str, url: str, **kwargs: Any) -> HttpResponse: + if kwargs.get('headers') is None: + kwargs['headers'] = {} + kwargs['headers']['X-Custom-Header'] = 'custom-value' + return await inner_client.call(method=method, url=url, **kwargs) + + api_url = httpserver.url_for('/').removesuffix('/') + client = ApifyClientAsync.with_custom_client( + token='test_token', api_url=api_url, http_client=WrappingHttpClientAsync() + ) + + # Use _get() to test the raw request flow without Pydantic validation + result = await client.dataset('test-dataset')._get() + + assert result is not None + assert result['data']['id'] == 'test-dataset'