From 8cad430f57ad5699d03ef038ba8418ea2bb7474f Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Mon, 20 Apr 2026 16:12:59 +0200
Subject: [PATCH 01/63] feat: implement apifyclient wrapper

---
 langchain_apify/_client.py | 205 +++++++++++++++++++++++++++++++++++++
 1 file changed, 205 insertions(+)
 create mode 100644 langchain_apify/_client.py

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
new file mode 100644
index 0000000..6ab49be
--- /dev/null
+++ b/langchain_apify/_client.py
@@ -0,0 +1,205 @@
+from __future__ import annotations
+
+import os
+
+from apify_client import ApifyClient
+
+from langchain_apify.error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+from langchain_apify.utils import create_apify_client
+
+_SCRAPE_ACTOR_ID = 'apify/website-content-crawler'
+
+
+class ApifyToolsClient:
+    """Internal helper that wraps ``ApifyClient`` for the tools layer.
+
+    One convenience method per tool operation. All methods are synchronous and
+    block until the Actor run finishes.,
+
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+
+    Raises:
+        ValueError: If no token is provided and the env var is not set.
+    """
+
+    def __init__(self, apify_api_token: str | None = None) -> None:
+        token = apify_api_token or os.getenv('APIFY_API_TOKEN')
+        if not token:
+            msg = ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+            raise ValueError(msg)
+        self._client = create_apify_client(ApifyClient, token)
+
+    def run_actor(
+        self,
+        actor_id: str,
+        run_input: dict | None = None,
+        timeout_secs: int = 300,
+        memory_mbytes: int | None = None,
+    ) -> dict:
+        """Start an Actor and block until it finishes.
+
+        Args:
+            actor_id: Actor ID or name (e.g. ``"apify/python-example"``).
+            run_input: JSON-serialisable input for the Actor.
+            timeout_secs: Maximum time to wait for the run to finish.
+            memory_mbytes: Memory limit for the run, or *None* for Actor default.
+
+        Returns:
+            Full run-details dict returned by the Apify API.
+
+        Raises:
+            RuntimeError: If the run does not finish with status ``SUCCEEDED``.
+        """
+        call_kwargs: dict = {'run_input': run_input, 'timeout_secs': timeout_secs}
+        if memory_mbytes is not None:
+            call_kwargs['memory_mbytes'] = memory_mbytes
+
+        run = self._client.actor(actor_id).call(**call_kwargs)
+        self._check_run_status(run)
+        return run
+
+    def get_dataset_items(self, dataset_id: str, limit: int = 100, offset: int = 0) -> list[dict]:
+        """Fetch items from an existing dataset.
+
+        Args:
+            dataset_id: Apify dataset ID.
+            limit: Maximum number of items to return.
+            offset: Number of items to skip from the start.
+
+        Returns:
+            List of dataset item dicts (may be empty).
+        """
+        return self._client.dataset(dataset_id).list_items(limit=limit, offset=offset, clean=True).items
+
+    def run_actor_and_get_items(
+        self,
+        actor_id: str,
+        run_input: dict | None = None,
+        timeout_secs: int = 300,
+        memory_mbytes: int | None = None,
+        dataset_items_limit: int = 100,
+    ) -> tuple[dict, list[dict]]:
+        """Run an Actor, then fetch items from its default dataset.
+
+        Args:
+            actor_id: Actor ID or name.
+            run_input: JSON-serialisable input for the Actor.
+            timeout_secs: Maximum time to wait for the run to finish.
+            memory_mbytes: Memory limit for the run, or *None* for Actor default.
+            dataset_items_limit: Maximum number of dataset items to return.
+
+        Returns:
+            A ``(run_details, items)`` tuple.
+
+        Raises:
+            RuntimeError: If the run does not finish with status ``SUCCEEDED``.
+        """
+        run = self.run_actor(actor_id, run_input, timeout_secs, memory_mbytes)
+        dataset_id = run.get('defaultDatasetId', '')
+        items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
+        return run, items
+
+    def run_task(
+        self,
+        task_id: str,
+        task_input: dict | None = None,
+        timeout_secs: int = 300,
+        memory_mbytes: int | None = None,
+    ) -> dict:
+        """Start a saved Actor task and block until it finishes.
+
+        Args:
+            task_id: Task ID or name (e.g. ``"user/my-task"``).
+            task_input: JSON-serialisable input that overrides the task's
+                pre-saved input.
+            timeout_secs: Maximum time to wait for the run to finish.
+            memory_mbytes: Memory limit for the run, or *None* for task default.
+
+        Returns:
+            Full run-details dict returned by the Apify API.
+
+        Raises:
+            RuntimeError: If the run does not finish with status ``SUCCEEDED``.
+        """
+        call_kwargs: dict = {'task_input': task_input, 'timeout_secs': timeout_secs}
+        if memory_mbytes is not None:
+            call_kwargs['memory_mbytes'] = memory_mbytes
+
+        run = self._client.task(task_id).call(**call_kwargs)
+        self._check_run_status(run)
+        return run
+
+    def run_task_and_get_items(
+        self,
+        task_id: str,
+        task_input: dict | None = None,
+        timeout_secs: int = 300,
+        memory_mbytes: int | None = None,
+        dataset_items_limit: int = 100,
+    ) -> tuple[dict, list[dict]]:
+        """Run a saved Actor task, then fetch items from its default dataset.
+
+        Args:
+            task_id: Task ID or name.
+            task_input: JSON-serialisable input that overrides the task's
+                pre-saved input.
+            timeout_secs: Maximum time to wait for the run to finish.
+            memory_mbytes: Memory limit for the run, or *None* for task default.
+            dataset_items_limit: Maximum number of dataset items to return.
+
+        Returns:
+            A ``(run_details, items)`` tuple.
+
+        Raises:
+            RuntimeError: If the run does not finish with status ``SUCCEEDED``.
+        """
+        run = self.run_task(task_id, task_input, timeout_secs, memory_mbytes)
+        dataset_id = run.get('defaultDatasetId', '')
+        items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
+        return run, items
+
+    def scrape_url(self, url: str, timeout_secs: int = 120) -> str:
+        """Scrape a single URL and return its content as markdown.
+
+        Uses ``apify/website-content-crawler`` with ``maxCrawlPages=1``.
+
+        Args:
+            url: The URL to scrape.
+            timeout_secs: Maximum time to wait for the crawl to finish.
+
+        Returns:
+            Markdown (or plain-text fallback) content of the page.
+
+        Raises:
+            RuntimeError: If the Actor run fails or no content is extracted.
+        """
+        run_input = {
+            'startUrls': [{'url': url}],
+            'maxCrawlPages': 1,
+        }
+        _, items = self.run_actor_and_get_items(
+            _SCRAPE_ACTOR_ID,
+            run_input=run_input,
+            timeout_secs=timeout_secs,
+            dataset_items_limit=1,
+        )
+        if not items:
+            msg = f'No content extracted from {url}.'
+            raise RuntimeError(msg)
+
+        content = items[0].get('markdown') or items[0].get('text') or ''
+        if not content:
+            msg = f'No content extracted from {url}.'
+            raise RuntimeError(msg)
+        return content
+
+    @staticmethod
+    def _check_run_status(run: dict) -> None:
+        """Raise if the run did not succeed."""
+        status = run.get('status')
+        if status != 'SUCCEEDED':
+            run_id = run.get('id', 'unknown')
+            msg = f'Actor run {run_id} ended with status {status}.'
+            raise RuntimeError(msg)

From 2404b9cd73faaea8c3c904b4a34e58ee4cf96a17 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Mon, 20 Apr 2026 16:45:59 +0200
Subject: [PATCH 02/63] feat: removed redundant const file

---
 langchain_apify/_client.py | 21 +++++++++++++--------
 langchain_apify/const.py   |  2 --
 langchain_apify/tools.py   |  7 +++----
 langchain_apify/utils.py   | 12 ++++++------
 4 files changed, 22 insertions(+), 20 deletions(-)
 delete mode 100644 langchain_apify/const.py

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index 6ab49be..068835b 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -8,6 +8,11 @@
 from langchain_apify.utils import create_apify_client
 
 _SCRAPE_ACTOR_ID = 'apify/website-content-crawler'
+_DEFAULT_RUN_TIMEOUT_SECS = 300
+_DEFAULT_SCRAPE_TIMEOUT_SECS = 120
+_DEFAULT_TASK_TIMEOUT_SECS = 300
+_DEFAULT_DATASET_ITEMS_LIMIT = 100
+_RUN_STATUS_SUCCEEDED = 'SUCCEEDED'
 
 
 class ApifyToolsClient:
@@ -35,7 +40,7 @@ def run_actor(
         self,
         actor_id: str,
         run_input: dict | None = None,
-        timeout_secs: int = 300,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
         memory_mbytes: int | None = None,
     ) -> dict:
         """Start an Actor and block until it finishes.
@@ -77,9 +82,9 @@ def run_actor_and_get_items(
         self,
         actor_id: str,
         run_input: dict | None = None,
-        timeout_secs: int = 300,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
         memory_mbytes: int | None = None,
-        dataset_items_limit: int = 100,
+        dataset_items_limit: int = _DEFAULT_DATASET_ITEMS_LIMIT,
     ) -> tuple[dict, list[dict]]:
         """Run an Actor, then fetch items from its default dataset.
 
@@ -105,7 +110,7 @@ def run_task(
         self,
         task_id: str,
         task_input: dict | None = None,
-        timeout_secs: int = 300,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
         memory_mbytes: int | None = None,
     ) -> dict:
         """Start a saved Actor task and block until it finishes.
@@ -135,9 +140,9 @@ def run_task_and_get_items(
         self,
         task_id: str,
         task_input: dict | None = None,
-        timeout_secs: int = 300,
+        timeout_secs: int = _DEFAULT_TASK_TIMEOUT_SECS,
         memory_mbytes: int | None = None,
-        dataset_items_limit: int = 100,
+        dataset_items_limit: int = _DEFAULT_DATASET_ITEMS_LIMIT,
     ) -> tuple[dict, list[dict]]:
         """Run a saved Actor task, then fetch items from its default dataset.
 
@@ -160,7 +165,7 @@ def run_task_and_get_items(
         items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
         return run, items
 
-    def scrape_url(self, url: str, timeout_secs: int = 120) -> str:
+    def scrape_url(self, url: str, timeout_secs: int = _DEFAULT_SCRAPE_TIMEOUT_SECS) -> str:
         """Scrape a single URL and return its content as markdown.
 
         Uses ``apify/website-content-crawler`` with ``maxCrawlPages=1``.
@@ -199,7 +204,7 @@ def scrape_url(self, url: str, timeout_secs: int = 120) -> str:
     def _check_run_status(run: dict) -> None:
         """Raise if the run did not succeed."""
         status = run.get('status')
-        if status != 'SUCCEEDED':
+        if status != _RUN_STATUS_SUCCEEDED:
             run_id = run.get('id', 'unknown')
             msg = f'Actor run {run_id} ended with status {status}.'
             raise RuntimeError(msg)
diff --git a/langchain_apify/const.py b/langchain_apify/const.py
deleted file mode 100644
index 87e0d0e..0000000
--- a/langchain_apify/const.py
+++ /dev/null
@@ -1,2 +0,0 @@
-REQUESTS_TIMEOUT_SECS: float = 10.0
-MAX_DESCRIPTION_LEN: int = 350
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 135314a..2afa413 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -10,14 +10,13 @@
 
 from langchain_apify.error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
 from langchain_apify.utils import (
+    _MAX_DESCRIPTION_LEN,
     actor_id_to_tool_name,
     create_apify_client,
     get_actor_latest_build,
     prune_actor_input_schema,
 )
 
-from .const import MAX_DESCRIPTION_LEN
-
 if TYPE_CHECKING:
     from langchain_core.callbacks import (
         CallbackManagerForToolRun,
@@ -128,8 +127,8 @@ def _create_description(apify_client: ApifyClient, actor_id: str) -> str:
         """
         build = get_actor_latest_build(apify_client, actor_id)
         actor_description = build.get('actorDefinition', {}).get('description', '')
-        if len(actor_description) > MAX_DESCRIPTION_LEN:
-            actor_description = actor_description[:MAX_DESCRIPTION_LEN] + '...(TRUNCATED, TOO LONG)'
+        if len(actor_description) > _MAX_DESCRIPTION_LEN:
+            actor_description = actor_description[:_MAX_DESCRIPTION_LEN] + '...(TRUNCATED, TOO LONG)'
         return actor_description
 
     @staticmethod
diff --git a/langchain_apify/utils.py b/langchain_apify/utils.py
index 8cdc835..4f2e74f 100644
--- a/langchain_apify/utils.py
+++ b/langchain_apify/utils.py
@@ -7,14 +7,14 @@
 from apify_client import ApifyClientAsync
 from apify_client.client import ApifyClient
 
-from langchain_apify.const import MAX_DESCRIPTION_LEN, REQUESTS_TIMEOUT_SECS
-
-APIFY_API_ENDPOINT_GET_DEFAULT_BUILD = 'https://api.apify.com/v2/acts/{actor_id}/builds/default'
+_MAX_DESCRIPTION_LEN: int = 350
+_REQUESTS_TIMEOUT_SECS: float = 10.0
+_APIFY_API_ENDPOINT_GET_DEFAULT_BUILD = 'https://api.apify.com/v2/acts/{actor_id}/builds/default'
 
 
 def prune_actor_input_schema(
     input_schema: dict,
-    max_description_len: int = MAX_DESCRIPTION_LEN,
+    max_description_len: int = _MAX_DESCRIPTION_LEN,
 ) -> tuple[dict, list[str]]:
     """Get the input schema from the Actor build.
 
@@ -117,8 +117,8 @@ def get_actor_latest_build(apify_client: ApifyClient, actor_id: str) -> dict:
         msg = f'Failed to get the Actor object ID for {actor_id}.'
         raise ValueError(msg)
 
-    url = APIFY_API_ENDPOINT_GET_DEFAULT_BUILD.format(actor_id=actor_obj_id)
-    response = requests.request('GET', url, timeout=REQUESTS_TIMEOUT_SECS)
+    url = _APIFY_API_ENDPOINT_GET_DEFAULT_BUILD.format(actor_id=actor_obj_id)
+    response = requests.request('GET', url, timeout=_REQUESTS_TIMEOUT_SECS)
 
     build = response.json()
     if not isinstance(build, dict):

From b1a89a455602d3cd5941c5f5e1f05695899f5cd0 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Mon, 20 Apr 2026 17:59:49 +0200
Subject: [PATCH 03/63] feat: add few more input schemas, helpers and tool
 classes

---
 langchain_apify/_client.py                    |  10 +-
 .../{error_messages.py => _error_messages.py} |   4 +
 langchain_apify/tools.py                      | 261 +++++++++++++++++-
 langchain_apify/utils.py                      |   3 +-
 tests/integration_tests/test_utils.py         |   2 +-
 5 files changed, 270 insertions(+), 10 deletions(-)
 rename langchain_apify/{error_messages.py => _error_messages.py} (75%)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index 068835b..181c6ec 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -4,7 +4,7 @@
 
 from apify_client import ApifyClient
 
-from langchain_apify.error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+from langchain_apify._error_messages import ERROR_ACTOR_RUN_FAILED, ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET, ERROR_SCRAPE_EMPTY
 from langchain_apify.utils import create_apify_client
 
 _SCRAPE_ACTOR_ID = 'apify/website-content-crawler'
@@ -65,7 +65,7 @@ def run_actor(
         self._check_run_status(run)
         return run
 
-    def get_dataset_items(self, dataset_id: str, limit: int = 100, offset: int = 0) -> list[dict]:
+    def get_dataset_items(self, dataset_id: str, limit: int = _DEFAULT_DATASET_ITEMS_LIMIT, offset: int = 0) -> list[dict]:
         """Fetch items from an existing dataset.
 
         Args:
@@ -191,12 +191,12 @@ def scrape_url(self, url: str, timeout_secs: int = _DEFAULT_SCRAPE_TIMEOUT_SECS)
             dataset_items_limit=1,
         )
         if not items:
-            msg = f'No content extracted from {url}.'
+            msg = ERROR_SCRAPE_EMPTY.format(url=url)
             raise RuntimeError(msg)
 
         content = items[0].get('markdown') or items[0].get('text') or ''
         if not content:
-            msg = f'No content extracted from {url}.'
+            msg = ERROR_SCRAPE_EMPTY.format(url=url)
             raise RuntimeError(msg)
         return content
 
@@ -206,5 +206,5 @@ def _check_run_status(run: dict) -> None:
         status = run.get('status')
         if status != _RUN_STATUS_SUCCEEDED:
             run_id = run.get('id', 'unknown')
-            msg = f'Actor run {run_id} ended with status {status}.'
+            msg = ERROR_ACTOR_RUN_FAILED.format(run_id=run_id, status=status)
             raise RuntimeError(msg)
diff --git a/langchain_apify/error_messages.py b/langchain_apify/_error_messages.py
similarity index 75%
rename from langchain_apify/error_messages.py
rename to langchain_apify/_error_messages.py
index 87462b8..a87c9cb 100644
--- a/langchain_apify/error_messages.py
+++ b/langchain_apify/_error_messages.py
@@ -5,3 +5,7 @@
     ' To pass it as environment variable, you can use the following command:'
     ' `APIFY_API_TOKEN="YOUR_APIFY_API_TOKEN" python your_script.py`'
 )
+
+ERROR_ACTOR_RUN_FAILED = 'Actor run {run_id} ended with status {status}.'
+
+ERROR_SCRAPE_EMPTY = 'No content extracted from {url}.'
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 2afa413..40aeeee 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -5,10 +5,11 @@
 from typing import TYPE_CHECKING, Any
 
 from apify_client import ApifyClient
-from langchain_core.tools import BaseTool
+from langchain_core.tools import BaseTool, ToolException
 from pydantic import BaseModel, Field, create_model
 
-from langchain_apify.error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+from langchain_apify._client import ApifyToolsClient
+from langchain_apify._error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
 from langchain_apify.utils import (
     _MAX_DESCRIPTION_LEN,
     actor_id_to_tool_name,
@@ -191,3 +192,259 @@ def _run_actor(self, run_input: dict) -> list[dict]:
         run = self._apify_client.run(run_id=run_id)
 
         return run.dataset().list_items(clean=True).items
+
+
+# ---------------------------------------------------------------------------
+# Input schemas for the generic tools
+# ---------------------------------------------------------------------------
+
+
+class ApifyRunActorInput(BaseModel):
+    """Input schema for :class:`ApifyRunActorTool`."""
+
+    actor_id: str = Field(description='Actor ID or name (e.g. "apify/python-example").')
+    run_input: dict | None = Field(default=None, description='JSON-serialisable input for the Actor.')
+    timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the run to finish.')
+    memory_mbytes: int | None = Field(default=None, description='Memory limit in MB for the run, or null for default.')
+
+
+class ApifyGetDatasetItemsInput(BaseModel):
+    """Input schema for :class:`ApifyGetDatasetItemsTool`."""
+
+    dataset_id: str = Field(description='Apify dataset ID.')
+    limit: int = Field(default=100, description='Maximum number of items to return.')
+    offset: int = Field(default=0, description='Number of items to skip from the start.')
+
+
+class ApifyRunActorAndGetItemsInput(BaseModel):
+    """Input schema for :class:`ApifyRunActorAndGetItemsTool`."""
+
+    actor_id: str = Field(description='Actor ID or name (e.g. "apify/python-example").')
+    run_input: dict | None = Field(default=None, description='JSON-serialisable input for the Actor.')
+    timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the run to finish.')
+    memory_mbytes: int | None = Field(default=None, description='Memory limit in MB for the run, or null for default.')
+    dataset_items_limit: int = Field(default=100, description='Maximum number of dataset items to return.')
+
+
+class ApifyScrapeUrlInput(BaseModel):
+    """Input schema for :class:`ApifyScrapeUrlTool`."""
+
+    url: str = Field(description='The URL to scrape.')
+    timeout_secs: int = Field(default=120, description='Maximum time in seconds to wait for the crawl to finish.')
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _iso(value: str | None) -> str | None:
+    """Pass through an ISO timestamp or *None*."""
+    return value
+
+
+def _run_meta(run: dict) -> dict:
+    """Extract a compact metadata dict from an Apify run-details dict."""
+    return {
+        'run_id': run.get('id'),
+        'status': run.get('status'),
+        'dataset_id': run.get('defaultDatasetId'),
+        'started_at': _iso(run.get('startedAt')),
+        'finished_at': _iso(run.get('finishedAt')),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Generic tools
+# ---------------------------------------------------------------------------
+
+
+class ApifyRunActorTool(BaseTool):
+    """Run any Apify Actor by ID with an arbitrary JSON input.
+
+    Returns run metadata (run ID, status, dataset ID, timestamps) as a JSON
+    string.  Use :class:`ApifyGetDatasetItemsTool` afterwards to retrieve the
+    results from the dataset.
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifyRunActorTool
+
+            tool = ApifyRunActorTool()
+            result = tool.invoke({
+                "actor_id": "apify/python-example",
+                "run_input": {"first_number": 2, "second_number": 3},
+            })
+    """
+
+    name: str = 'apify_run_actor'
+    description: str = (
+        'Run an Apify Actor synchronously and return run metadata'
+        ' (run_id, status, dataset_id, timestamps) as a JSON string.'
+    )
+    args_schema: type[BaseModel] = ApifyRunActorInput
+    handle_tool_error: bool = True
+
+    _client: ApifyToolsClient
+
+    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
+        super().__init__(**kwargs)
+        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
+
+    def _run(
+        self,
+        actor_id: str,
+        run_input: dict | None = None,
+        timeout_secs: int = 300,
+        memory_mbytes: int | None = None,
+        run_manager: CallbackManagerForToolRun | None = None,
+    ) -> str:
+        try:
+            run = self._client.run_actor(actor_id, run_input, timeout_secs, memory_mbytes)
+        except RuntimeError as exc:
+            raise ToolException(str(exc)) from exc
+        return json.dumps(_run_meta(run))
+
+
+class ApifyGetDatasetItemsTool(BaseTool):
+    """Fetch items from an existing Apify dataset by ID.
+
+    Returns items as a JSON string.  When the dataset is empty the tool returns
+    an informative JSON message instead of raising an error.
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifyGetDatasetItemsTool
+
+            tool = ApifyGetDatasetItemsTool()
+            result = tool.invoke({"dataset_id": "abc123", "limit": 10})
+    """
+
+    name: str = 'apify_get_dataset_items'
+    description: str = 'Fetch items from an Apify dataset by ID. Returns a JSON array of items.'
+    args_schema: type[BaseModel] = ApifyGetDatasetItemsInput
+    handle_tool_error: bool = True
+
+    _client: ApifyToolsClient
+
+    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
+        super().__init__(**kwargs)
+        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
+
+    def _run(
+        self,
+        dataset_id: str,
+        limit: int = 100,
+        offset: int = 0,
+        run_manager: CallbackManagerForToolRun | None = None,
+    ) -> str:
+        items = self._client.get_dataset_items(dataset_id, limit, offset)
+        if not items:
+            return json.dumps({'items': [], 'message': 'Dataset is empty or not found.'})
+        return json.dumps(items)
+
+
+class ApifyRunActorAndGetItemsTool(BaseTool):
+    """Run any Apify Actor and return both run metadata and dataset items.
+
+    Combines :class:`ApifyRunActorTool` and :class:`ApifyGetDatasetItemsTool`
+    into a single call.  Returns a JSON string with ``run`` (metadata) and
+    ``items`` (list of dicts) keys.
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifyRunActorAndGetItemsTool
+
+            tool = ApifyRunActorAndGetItemsTool()
+            result = tool.invoke({
+                "actor_id": "apify/python-example",
+                "run_input": {"first_number": 2, "second_number": 3},
+            })
+    """
+
+    name: str = 'apify_run_actor_and_get_items'
+    description: str = (
+        'Run an Apify Actor synchronously and return both run metadata and'
+        ' dataset items as a JSON string with "run" and "items" keys.'
+    )
+    args_schema: type[BaseModel] = ApifyRunActorAndGetItemsInput
+    handle_tool_error: bool = True
+
+    _client: ApifyToolsClient
+
+    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
+        super().__init__(**kwargs)
+        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
+
+    def _run(
+        self,
+        actor_id: str,
+        run_input: dict | None = None,
+        timeout_secs: int = 300,
+        memory_mbytes: int | None = None,
+        dataset_items_limit: int = 100,
+        run_manager: CallbackManagerForToolRun | None = None,
+    ) -> str:
+        try:
+            run, items = self._client.run_actor_and_get_items(
+                actor_id, run_input, timeout_secs, memory_mbytes, dataset_items_limit
+            )
+        except RuntimeError as exc:
+            raise ToolException(str(exc)) from exc
+        return json.dumps({'run': _run_meta(run), 'items': items})
+
+
+class ApifyScrapeUrlTool(BaseTool):
+    """Scrape a single URL and return its content as markdown.
+
+    Uses the ``apify/website-content-crawler`` Actor under the hood with
+    ``maxCrawlPages=1``.  Returns the page content as a plain markdown string
+    (not JSON).
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifyScrapeUrlTool
+
+            tool = ApifyScrapeUrlTool()
+            markdown = tool.invoke({"url": "https://apify.com"})
+    """
+
+    name: str = 'apify_scrape_url'
+    description: str = (
+        'Scrape a single URL using Apify and return its content as markdown text.'
+    )
+    args_schema: type[BaseModel] = ApifyScrapeUrlInput
+    handle_tool_error: bool = True
+
+    _client: ApifyToolsClient
+
+    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
+        super().__init__(**kwargs)
+        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
+
+    def _run(
+        self,
+        url: str,
+        timeout_secs: int = 120,
+        run_manager: CallbackManagerForToolRun | None = None,
+    ) -> str:
+        try:
+            return self._client.scrape_url(url, timeout_secs)
+        except RuntimeError as exc:
+            raise ToolException(str(exc)) from exc
diff --git a/langchain_apify/utils.py b/langchain_apify/utils.py
index 4f2e74f..6b9f9fd 100644
--- a/langchain_apify/utils.py
+++ b/langchain_apify/utils.py
@@ -9,8 +9,7 @@
 
 _MAX_DESCRIPTION_LEN: int = 350
 _REQUESTS_TIMEOUT_SECS: float = 10.0
-_APIFY_API_ENDPOINT_GET_DEFAULT_BUILD = 'https://api.apify.com/v2/acts/{actor_id}/builds/default'
-
+_APIFY_API_ENDPOINT_GET_DEFAULT_BUILD: str = 'https://api.apify.com/v2/acts/{actor_id}/builds/default'
 
 def prune_actor_input_schema(
     input_schema: dict,
diff --git a/tests/integration_tests/test_utils.py b/tests/integration_tests/test_utils.py
index 1107c7a..554cc2d 100644
--- a/tests/integration_tests/test_utils.py
+++ b/tests/integration_tests/test_utils.py
@@ -2,7 +2,7 @@
 
 from apify_client.client import ApifyClient
 
-from langchain_apify.error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+from langchain_apify._error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
 from langchain_apify.utils import create_apify_client, get_actor_latest_build
 
 

From 0aa917582970bba0d0e50fd88cc17b8606397cfc Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Mon, 20 Apr 2026 18:28:13 +0200
Subject: [PATCH 04/63] feat: export new tools from __init__

---
 langchain_apify/__init__.py | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
index 66142be..cfedc69 100644
--- a/langchain_apify/__init__.py
+++ b/langchain_apify/__init__.py
@@ -1,19 +1,50 @@
 from importlib import metadata
 
 from langchain_apify.document_loaders import ApifyDatasetLoader
-from langchain_apify.tools import ApifyActorsTool
+from langchain_apify.tools import (
+    ApifyActorsTool,
+    ApifyGetDatasetItemsTool,
+    ApifyRunActorAndGetItemsTool,
+    ApifyRunActorTool,
+    ApifyScrapeUrlTool,
+)
 from langchain_apify.wrappers import ApifyWrapper
 
 try:
     __version__ = metadata.version(__package__)
 except metadata.PackageNotFoundError:
-    # Case where package metadata is not available.
     __version__ = ''
 del metadata  # optional, avoids polluting the results of dir(__package__)
 
+# ---------------------------------------------------------------------------
+# Convenience tool-class lists for selective agent binding.
+#
+# Binding all tools at once overwhelms the LLM context window; pick the
+# group(s) relevant to your use case:
+#
+#   from langchain_apify import CORE_TOOLS
+#   agent = create_react_agent(model, [t() for t in CORE_TOOLS])
+# ---------------------------------------------------------------------------
+
+CORE_TOOLS: list[type] = [
+    ApifyRunActorTool,
+    ApifyGetDatasetItemsTool,
+    ApifyRunActorAndGetItemsTool,
+    ApifyScrapeUrlTool,
+]
+
 __all__ = [
+    # Existing components (backward-compatible)
     'ApifyActorsTool',
     'ApifyDatasetLoader',
     'ApifyWrapper',
+    # Core generic tools
+    'ApifyGetDatasetItemsTool',
+    'ApifyRunActorAndGetItemsTool',
+    'ApifyRunActorTool',
+    'ApifyScrapeUrlTool',
+    # Tool group lists
+    'CORE_TOOLS',
+    # Meta
     '__version__',
 ]

From 4e46d3684048e15a4867a87c457c58d9f5e4ad2a Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Mon, 20 Apr 2026 18:35:25 +0200
Subject: [PATCH 05/63] feat: add unit tests

---
 tests/unit_tests/test_client.py | 232 ++++++++++++++++++++++++++++++++
 1 file changed, 232 insertions(+)
 create mode 100644 tests/unit_tests/test_client.py

diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
new file mode 100644
index 0000000..f30ed52
--- /dev/null
+++ b/tests/unit_tests/test_client.py
@@ -0,0 +1,232 @@
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from langchain_apify._client import ApifyToolsClient
+from langchain_apify._error_messages import ERROR_ACTOR_RUN_FAILED, ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET, ERROR_SCRAPE_EMPTY
+
+_SUCCEEDED_RUN: dict = {
+    'id': 'run-abc',
+    'status': 'SUCCEEDED',
+    'defaultDatasetId': 'dataset-xyz',
+    'startedAt': '2025-01-01T00:00:00.000Z',
+    'finishedAt': '2025-01-01T00:01:00.000Z',
+}
+
+_FAILED_RUN: dict = {
+    'id': 'run-fail',
+    'status': 'FAILED',
+    'defaultDatasetId': 'dataset-xyz',
+}
+
+_SAMPLE_ITEMS: list[dict] = [
+    {'text': 'item-1', 'url': 'https://example.com/1'},
+    {'text': 'item-2', 'url': 'https://example.com/2'},
+]
+
+
+@pytest.fixture
+def mock_apify_client() -> MagicMock:
+    return MagicMock()
+
+
+@pytest.fixture
+def client(mock_apify_client: MagicMock) -> ApifyToolsClient:
+    with patch('langchain_apify._client.create_apify_client', return_value=mock_apify_client):
+        return ApifyToolsClient(apify_api_token='dummy-token')
+
+
+# ---------------------------------------------------------------------------
+# __init__
+# ---------------------------------------------------------------------------
+
+
+def test_init_with_explicit_token(mock_apify_client: MagicMock) -> None:
+    with patch('langchain_apify._client.create_apify_client', return_value=mock_apify_client) as mock_create:
+        c = ApifyToolsClient(apify_api_token='my-token')
+        mock_create.assert_called_once()
+        assert c._client is mock_apify_client
+
+
+def test_init_with_env_token(monkeypatch: pytest.MonkeyPatch, mock_apify_client: MagicMock) -> None:
+    monkeypatch.setenv('APIFY_API_TOKEN', 'env-token')
+    with patch('langchain_apify._client.create_apify_client', return_value=mock_apify_client):
+        c = ApifyToolsClient()
+        assert c._client is mock_apify_client
+
+
+def test_init_missing_token_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyToolsClient()
+
+
+# ---------------------------------------------------------------------------
+# run_actor
+# ---------------------------------------------------------------------------
+
+
+def test_run_actor_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+
+    result = client.run_actor('apify/test-actor', run_input={'key': 'val'})
+
+    mock_apify_client.actor.assert_called_once_with('apify/test-actor')
+    mock_apify_client.actor.return_value.call.assert_called_once_with(
+        run_input={'key': 'val'}, timeout_secs=300
+    )
+    assert result == _SUCCEEDED_RUN
+
+
+def test_run_actor_with_memory(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+
+    client.run_actor('apify/test-actor', memory_mbytes=512)
+
+    mock_apify_client.actor.return_value.call.assert_called_once_with(
+        run_input=None, timeout_secs=300, memory_mbytes=512
+    )
+
+
+def test_run_actor_failed_status_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = _FAILED_RUN
+
+    with pytest.raises(RuntimeError, match='run-fail'):
+        client.run_actor('apify/test-actor')
+
+
+# ---------------------------------------------------------------------------
+# get_dataset_items
+# ---------------------------------------------------------------------------
+
+
+def test_get_dataset_items_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.dataset.return_value.list_items.return_value.items = _SAMPLE_ITEMS
+
+    items = client.get_dataset_items('dataset-xyz', limit=50, offset=10)
+
+    mock_apify_client.dataset.assert_called_once_with('dataset-xyz')
+    mock_apify_client.dataset.return_value.list_items.assert_called_once_with(limit=50, offset=10, clean=True)
+    assert items == _SAMPLE_ITEMS
+
+
+def test_get_dataset_items_empty(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.dataset.return_value.list_items.return_value.items = []
+
+    items = client.get_dataset_items('dataset-empty')
+    assert items == []
+
+
+# ---------------------------------------------------------------------------
+# run_actor_and_get_items
+# ---------------------------------------------------------------------------
+
+
+def test_run_actor_and_get_items_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = _SAMPLE_ITEMS
+
+    run, items = client.run_actor_and_get_items('apify/test-actor', run_input={'q': '1'})
+
+    assert run == _SUCCEEDED_RUN
+    assert items == _SAMPLE_ITEMS
+    mock_apify_client.dataset.assert_called_once_with('dataset-xyz')
+
+
+# ---------------------------------------------------------------------------
+# run_task
+# ---------------------------------------------------------------------------
+
+
+def test_run_task_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.task.return_value.call.return_value = _SUCCEEDED_RUN
+
+    result = client.run_task('user/my-task', task_input={'key': 'val'})
+
+    mock_apify_client.task.assert_called_once_with('user/my-task')
+    mock_apify_client.task.return_value.call.assert_called_once_with(
+        task_input={'key': 'val'}, timeout_secs=300
+    )
+    assert result == _SUCCEEDED_RUN
+
+
+def test_run_task_failed_status_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.task.return_value.call.return_value = _FAILED_RUN
+
+    with pytest.raises(RuntimeError, match='run-fail'):
+        client.run_task('user/my-task')
+
+
+# ---------------------------------------------------------------------------
+# run_task_and_get_items
+# ---------------------------------------------------------------------------
+
+
+def test_run_task_and_get_items_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.task.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = _SAMPLE_ITEMS
+
+    run, items = client.run_task_and_get_items('user/my-task')
+
+    assert run == _SUCCEEDED_RUN
+    assert items == _SAMPLE_ITEMS
+
+
+# ---------------------------------------------------------------------------
+# scrape_url
+# ---------------------------------------------------------------------------
+
+
+def test_scrape_url_returns_markdown(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = [
+        {'markdown': '# Hello', 'text': 'Hello', 'url': 'https://example.com'},
+    ]
+
+    content = client.scrape_url('https://example.com')
+    assert content == '# Hello'
+
+
+def test_scrape_url_falls_back_to_text(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = [
+        {'text': 'Plain text content', 'url': 'https://example.com'},
+    ]
+
+    content = client.scrape_url('https://example.com')
+    assert content == 'Plain text content'
+
+
+def test_scrape_url_empty_items_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = []
+
+    with pytest.raises(RuntimeError, match='No content extracted'):
+        client.scrape_url('https://example.com')
+
+
+def test_scrape_url_empty_content_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = [
+        {'markdown': '', 'text': '', 'url': 'https://example.com'},
+    ]
+
+    with pytest.raises(RuntimeError, match='No content extracted'):
+        client.scrape_url('https://example.com')
+
+
+# ---------------------------------------------------------------------------
+# _check_run_status
+# ---------------------------------------------------------------------------
+
+
+def test_check_run_status_succeeded() -> None:
+    ApifyToolsClient._check_run_status({'id': 'run-ok', 'status': 'SUCCEEDED'})
+
+
+def test_check_run_status_failed() -> None:
+    expected_msg = ERROR_ACTOR_RUN_FAILED.format(run_id='run-bad', status='FAILED')
+    with pytest.raises(RuntimeError, match='run-bad'):
+        ApifyToolsClient._check_run_status({'id': 'run-bad', 'status': 'FAILED'})

From fc6ef1286297c1f8581b15fe475b150ee1fa6b58 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 09:45:06 +0200
Subject: [PATCH 06/63] feat: implement tests and introduce tools list

---
 langchain_apify/__init__.py    |   8 +-
 tests/unit_tests/test_tools.py | 186 ++++++++++++++++++++++++++++++++-
 2 files changed, 188 insertions(+), 6 deletions(-)

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
index cfedc69..21e5776 100644
--- a/langchain_apify/__init__.py
+++ b/langchain_apify/__init__.py
@@ -22,11 +22,11 @@
 # Binding all tools at once overwhelms the LLM context window; pick the
 # group(s) relevant to your use case:
 #
-#   from langchain_apify import CORE_TOOLS
-#   agent = create_react_agent(model, [t() for t in CORE_TOOLS])
+#   from langchain_apify import APIFY_CORE_TOOLS
+#   agent = create_react_agent(model, [t() for t in APIFY_CORE_TOOLS])
 # ---------------------------------------------------------------------------
 
-CORE_TOOLS: list[type] = [
+APIFY_CORE_TOOLS: list[type] = [
     ApifyRunActorTool,
     ApifyGetDatasetItemsTool,
     ApifyRunActorAndGetItemsTool,
@@ -44,7 +44,7 @@
     'ApifyRunActorTool',
     'ApifyScrapeUrlTool',
     # Tool group lists
-    'CORE_TOOLS',
+    'APIFY_CORE_TOOLS',
     # Meta
     '__version__',
 ]
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index b10df2f..f17572f 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -1,12 +1,21 @@
 from __future__ import annotations
 
+import json
 from typing import TYPE_CHECKING
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
+from langchain_core.tools import ToolException
 from pydantic import BaseModel
 
-from langchain_apify.tools import ApifyActorsTool
+from langchain_apify._client import ApifyToolsClient
+from langchain_apify.tools import (
+    ApifyActorsTool,
+    ApifyGetDatasetItemsTool,
+    ApifyRunActorAndGetItemsTool,
+    ApifyRunActorTool,
+    ApifyScrapeUrlTool,
+)
 from langchain_apify.utils import actor_id_to_tool_name
 
 if TYPE_CHECKING:
@@ -85,3 +94,176 @@ class DummyModel(BaseModel):
 
         tool = ApifyActorsTool(actor_id='apify/python-example', apify_api_token='dummy-token')
         yield tool
+
+
+# ---------------------------------------------------------------------------
+# Shared test data for generic tools
+# ---------------------------------------------------------------------------
+
+_SUCCEEDED_RUN: dict = {
+    'id': 'run-abc',
+    'status': 'SUCCEEDED',
+    'defaultDatasetId': 'dataset-xyz',
+    'startedAt': '2025-01-01T00:00:00.000Z',
+    'finishedAt': '2025-01-01T00:01:00.000Z',
+}
+
+_SAMPLE_ITEMS: list[dict] = [
+    {'text': 'item-1', 'url': 'https://example.com/1'},
+    {'text': 'item-2', 'url': 'https://example.com/2'},
+]
+
+
+@pytest.fixture
+def mock_tools_client() -> MagicMock:
+    return MagicMock(spec=ApifyToolsClient)
+
+
+def _make_tool(tool_cls: type, mock_client: MagicMock) -> ApifyRunActorTool | ApifyGetDatasetItemsTool | ApifyRunActorAndGetItemsTool | ApifyScrapeUrlTool:
+    """Instantiate a generic tool with a mocked ApifyToolsClient."""
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        tool = tool_cls(apify_api_token='dummy-token')
+    tool._client = mock_client
+    return tool
+
+
+# ---------------------------------------------------------------------------
+# ApifyRunActorTool
+# ---------------------------------------------------------------------------
+
+
+def test_run_actor_tool_returns_json(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor.return_value = _SUCCEEDED_RUN
+    tool = _make_tool(ApifyRunActorTool, mock_tools_client)
+
+    result = tool._run(actor_id='apify/test', run_input={'key': 'val'})
+
+    parsed = json.loads(result)
+    assert parsed['run_id'] == 'run-abc'
+    assert parsed['status'] == 'SUCCEEDED'
+    assert parsed['dataset_id'] == 'dataset-xyz'
+    assert parsed['started_at'] == '2025-01-01T00:00:00.000Z'
+    assert parsed['finished_at'] == '2025-01-01T00:01:00.000Z'
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', {'key': 'val'}, 300, None)
+
+
+def test_run_actor_tool_failure_raises_tool_exception(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor.side_effect = RuntimeError('Actor run run-bad ended with status FAILED.')
+    tool = _make_tool(ApifyRunActorTool, mock_tools_client)
+
+    with pytest.raises(ToolException, match='FAILED'):
+        tool._run(actor_id='apify/test')
+
+
+def test_run_actor_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyRunActorTool()
+
+
+# ---------------------------------------------------------------------------
+# ApifyGetDatasetItemsTool
+# ---------------------------------------------------------------------------
+
+
+def test_get_dataset_items_tool_returns_json_array(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.get_dataset_items.return_value = _SAMPLE_ITEMS
+    tool = _make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
+
+    result = tool._run(dataset_id='dataset-xyz', limit=50, offset=5)
+
+    parsed = json.loads(result)
+    assert len(parsed) == 2
+    assert parsed[0]['text'] == 'item-1'
+    mock_tools_client.get_dataset_items.assert_called_once_with('dataset-xyz', 50, 5)
+
+
+def test_get_dataset_items_tool_empty_returns_message(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.get_dataset_items.return_value = []
+    tool = _make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
+
+    result = tool._run(dataset_id='dataset-empty')
+
+    parsed = json.loads(result)
+    assert parsed['items'] == []
+    assert 'empty' in parsed['message'].lower()
+
+
+# ---------------------------------------------------------------------------
+# ApifyRunActorAndGetItemsTool
+# ---------------------------------------------------------------------------
+
+
+def test_run_actor_and_get_items_tool_returns_json(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor_and_get_items.return_value = (_SUCCEEDED_RUN, _SAMPLE_ITEMS)
+    tool = _make_tool(ApifyRunActorAndGetItemsTool, mock_tools_client)
+
+    result = tool._run(actor_id='apify/test', run_input={'q': '1'}, dataset_items_limit=50)
+
+    parsed = json.loads(result)
+    assert parsed['run']['run_id'] == 'run-abc'
+    assert parsed['run']['status'] == 'SUCCEEDED'
+    assert len(parsed['items']) == 2
+    mock_tools_client.run_actor_and_get_items.assert_called_once_with(
+        'apify/test', {'q': '1'}, 300, None, 50
+    )
+
+
+def test_run_actor_and_get_items_tool_failure_raises_tool_exception(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor_and_get_items.side_effect = RuntimeError('Actor run run-bad ended with status TIMED-OUT.')
+    tool = _make_tool(ApifyRunActorAndGetItemsTool, mock_tools_client)
+
+    with pytest.raises(ToolException, match='TIMED-OUT'):
+        tool._run(actor_id='apify/test')
+
+
+# ---------------------------------------------------------------------------
+# ApifyScrapeUrlTool
+# ---------------------------------------------------------------------------
+
+
+def test_scrape_url_tool_returns_markdown(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.scrape_url.return_value = '# Hello World'
+    tool = _make_tool(ApifyScrapeUrlTool, mock_tools_client)
+
+    result = tool._run(url='https://example.com')
+
+    assert result == '# Hello World'
+    mock_tools_client.scrape_url.assert_called_once_with('https://example.com', 120)
+
+
+def test_scrape_url_tool_empty_raises_tool_exception(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.scrape_url.side_effect = RuntimeError('No content extracted from https://example.com.')
+    tool = _make_tool(ApifyScrapeUrlTool, mock_tools_client)
+
+    with pytest.raises(ToolException, match='No content extracted'):
+        tool._run(url='https://example.com')
+
+
+# ---------------------------------------------------------------------------
+# Tool metadata assertions
+# ---------------------------------------------------------------------------
+
+
+def test_generic_tools_have_correct_metadata() -> None:
+    """Verify name, description, and args_schema are set on all 4 tools."""
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        tools = [
+            ApifyRunActorTool(apify_api_token='dummy'),
+            ApifyGetDatasetItemsTool(apify_api_token='dummy'),
+            ApifyRunActorAndGetItemsTool(apify_api_token='dummy'),
+            ApifyScrapeUrlTool(apify_api_token='dummy'),
+        ]
+
+    expected_names = [
+        'apify_run_actor',
+        'apify_get_dataset_items',
+        'apify_run_actor_and_get_items',
+        'apify_scrape_url',
+    ]
+
+    for tool, expected_name in zip(tools, expected_names):
+        assert tool.name == expected_name
+        assert tool.description
+        assert tool.args_schema is not None
+        assert tool.handle_tool_error is True

From cc5be9e887edc95719742b682e463e98b3d0ca36 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 10:01:27 +0200
Subject: [PATCH 07/63] fix: lint fix

---
 langchain_apify/__init__.py     | 12 ++++--------
 langchain_apify/_client.py      | 16 ++++++++++++++--
 langchain_apify/tools.py        | 20 +++++++++-----------
 langchain_apify/utils.py        |  1 +
 tests/unit_tests/test_client.py | 10 ++--------
 tests/unit_tests/test_tools.py  | 12 ++++++------
 6 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
index 21e5776..1b65eef 100644
--- a/langchain_apify/__init__.py
+++ b/langchain_apify/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from importlib import metadata
 
 from langchain_apify.document_loaders import ApifyDatasetLoader
@@ -16,15 +18,9 @@
     __version__ = ''
 del metadata  # optional, avoids polluting the results of dir(__package__)
 
-# ---------------------------------------------------------------------------
 # Convenience tool-class lists for selective agent binding.
-#
-# Binding all tools at once overwhelms the LLM context window; pick the
-# group(s) relevant to your use case:
-#
-#   from langchain_apify import APIFY_CORE_TOOLS
-#   agent = create_react_agent(model, [t() for t in APIFY_CORE_TOOLS])
-# ---------------------------------------------------------------------------
+# Binding all tools at once overwhelms the LLM context window;
+# pick the group(s) relevant to your use case.
 
 APIFY_CORE_TOOLS: list[type] = [
     ApifyRunActorTool,
diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index 181c6ec..bf3e01a 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -4,7 +4,11 @@
 
 from apify_client import ApifyClient
 
-from langchain_apify._error_messages import ERROR_ACTOR_RUN_FAILED, ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET, ERROR_SCRAPE_EMPTY
+from langchain_apify._error_messages import (
+    ERROR_ACTOR_RUN_FAILED,
+    ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET,
+    ERROR_SCRAPE_EMPTY,
+)
 from langchain_apify.utils import create_apify_client
 
 _SCRAPE_ACTOR_ID = 'apify/website-content-crawler'
@@ -62,10 +66,15 @@ def run_actor(
             call_kwargs['memory_mbytes'] = memory_mbytes
 
         run = self._client.actor(actor_id).call(**call_kwargs)
+        if run is None:
+            msg = f'Actor {actor_id} call returned no run details.'
+            raise RuntimeError(msg)
         self._check_run_status(run)
         return run
 
-    def get_dataset_items(self, dataset_id: str, limit: int = _DEFAULT_DATASET_ITEMS_LIMIT, offset: int = 0) -> list[dict]:
+    def get_dataset_items(
+        self, dataset_id: str, limit: int = _DEFAULT_DATASET_ITEMS_LIMIT, offset: int = 0
+    ) -> list[dict]:
         """Fetch items from an existing dataset.
 
         Args:
@@ -133,6 +142,9 @@ def run_task(
             call_kwargs['memory_mbytes'] = memory_mbytes
 
         run = self._client.task(task_id).call(**call_kwargs)
+        if run is None:
+            msg = f'Task {task_id} call returned no run details.'
+            raise RuntimeError(msg)
         self._check_run_status(run)
         return run
 
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 40aeeee..a751dad 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -259,7 +259,7 @@ def _run_meta(run: dict) -> dict:
 # ---------------------------------------------------------------------------
 
 
-class ApifyRunActorTool(BaseTool):
+class ApifyRunActorTool(BaseTool):  # type: ignore[override]
     """Run any Apify Actor by ID with an arbitrary JSON input.
 
     Returns run metadata (run ID, status, dataset ID, timestamps) as a JSON
@@ -301,7 +301,7 @@ def _run(
         run_input: dict | None = None,
         timeout_secs: int = 300,
         memory_mbytes: int | None = None,
-        run_manager: CallbackManagerForToolRun | None = None,
+        _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
         try:
             run = self._client.run_actor(actor_id, run_input, timeout_secs, memory_mbytes)
@@ -310,7 +310,7 @@ def _run(
         return json.dumps(_run_meta(run))
 
 
-class ApifyGetDatasetItemsTool(BaseTool):
+class ApifyGetDatasetItemsTool(BaseTool):  # type: ignore[override]
     """Fetch items from an existing Apify dataset by ID.
 
     Returns items as a JSON string.  When the dataset is empty the tool returns
@@ -344,7 +344,7 @@ def _run(
         dataset_id: str,
         limit: int = 100,
         offset: int = 0,
-        run_manager: CallbackManagerForToolRun | None = None,
+        _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
         items = self._client.get_dataset_items(dataset_id, limit, offset)
         if not items:
@@ -352,7 +352,7 @@ def _run(
         return json.dumps(items)
 
 
-class ApifyRunActorAndGetItemsTool(BaseTool):
+class ApifyRunActorAndGetItemsTool(BaseTool):  # type: ignore[override]
     """Run any Apify Actor and return both run metadata and dataset items.
 
     Combines :class:`ApifyRunActorTool` and :class:`ApifyGetDatasetItemsTool`
@@ -395,7 +395,7 @@ def _run(
         timeout_secs: int = 300,
         memory_mbytes: int | None = None,
         dataset_items_limit: int = 100,
-        run_manager: CallbackManagerForToolRun | None = None,
+        _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
         try:
             run, items = self._client.run_actor_and_get_items(
@@ -406,7 +406,7 @@ def _run(
         return json.dumps({'run': _run_meta(run), 'items': items})
 
 
-class ApifyScrapeUrlTool(BaseTool):
+class ApifyScrapeUrlTool(BaseTool):  # type: ignore[override]
     """Scrape a single URL and return its content as markdown.
 
     Uses the ``apify/website-content-crawler`` Actor under the hood with
@@ -426,9 +426,7 @@ class ApifyScrapeUrlTool(BaseTool):
     """
 
     name: str = 'apify_scrape_url'
-    description: str = (
-        'Scrape a single URL using Apify and return its content as markdown text.'
-    )
+    description: str = 'Scrape a single URL using Apify and return its content as markdown text.'
     args_schema: type[BaseModel] = ApifyScrapeUrlInput
     handle_tool_error: bool = True
 
@@ -442,7 +440,7 @@ def _run(
         self,
         url: str,
         timeout_secs: int = 120,
-        run_manager: CallbackManagerForToolRun | None = None,
+        _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
         try:
             return self._client.scrape_url(url, timeout_secs)
diff --git a/langchain_apify/utils.py b/langchain_apify/utils.py
index 6b9f9fd..d3a627f 100644
--- a/langchain_apify/utils.py
+++ b/langchain_apify/utils.py
@@ -11,6 +11,7 @@
 _REQUESTS_TIMEOUT_SECS: float = 10.0
 _APIFY_API_ENDPOINT_GET_DEFAULT_BUILD: str = 'https://api.apify.com/v2/acts/{actor_id}/builds/default'
 
+
 def prune_actor_input_schema(
     input_schema: dict,
     max_description_len: int = _MAX_DESCRIPTION_LEN,
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index f30ed52..89862b1 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -5,7 +5,6 @@
 import pytest
 
 from langchain_apify._client import ApifyToolsClient
-from langchain_apify._error_messages import ERROR_ACTOR_RUN_FAILED, ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET, ERROR_SCRAPE_EMPTY
 
 _SUCCEEDED_RUN: dict = {
     'id': 'run-abc',
@@ -74,9 +73,7 @@ def test_run_actor_success(client: ApifyToolsClient, mock_apify_client: MagicMoc
     result = client.run_actor('apify/test-actor', run_input={'key': 'val'})
 
     mock_apify_client.actor.assert_called_once_with('apify/test-actor')
-    mock_apify_client.actor.return_value.call.assert_called_once_with(
-        run_input={'key': 'val'}, timeout_secs=300
-    )
+    mock_apify_client.actor.return_value.call.assert_called_once_with(run_input={'key': 'val'}, timeout_secs=300)
     assert result == _SUCCEEDED_RUN
 
 
@@ -146,9 +143,7 @@ def test_run_task_success(client: ApifyToolsClient, mock_apify_client: MagicMock
     result = client.run_task('user/my-task', task_input={'key': 'val'})
 
     mock_apify_client.task.assert_called_once_with('user/my-task')
-    mock_apify_client.task.return_value.call.assert_called_once_with(
-        task_input={'key': 'val'}, timeout_secs=300
-    )
+    mock_apify_client.task.return_value.call.assert_called_once_with(task_input={'key': 'val'}, timeout_secs=300)
     assert result == _SUCCEEDED_RUN
 
 
@@ -227,6 +222,5 @@ def test_check_run_status_succeeded() -> None:
 
 
 def test_check_run_status_failed() -> None:
-    expected_msg = ERROR_ACTOR_RUN_FAILED.format(run_id='run-bad', status='FAILED')
     with pytest.raises(RuntimeError, match='run-bad'):
         ApifyToolsClient._check_run_status({'id': 'run-bad', 'status': 'FAILED'})
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index f17572f..af43843 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -119,7 +119,7 @@ def mock_tools_client() -> MagicMock:
     return MagicMock(spec=ApifyToolsClient)
 
 
-def _make_tool(tool_cls: type, mock_client: MagicMock) -> ApifyRunActorTool | ApifyGetDatasetItemsTool | ApifyRunActorAndGetItemsTool | ApifyScrapeUrlTool:
+def _make_tool(tool_cls: type, mock_client: MagicMock) -> Any:  # noqa: ANN401
     """Instantiate a generic tool with a mocked ApifyToolsClient."""
     with patch.object(ApifyToolsClient, '__init__', return_value=None):
         tool = tool_cls(apify_api_token='dummy-token')
@@ -204,13 +204,13 @@ def test_run_actor_and_get_items_tool_returns_json(mock_tools_client: MagicMock)
     assert parsed['run']['run_id'] == 'run-abc'
     assert parsed['run']['status'] == 'SUCCEEDED'
     assert len(parsed['items']) == 2
-    mock_tools_client.run_actor_and_get_items.assert_called_once_with(
-        'apify/test', {'q': '1'}, 300, None, 50
-    )
+    mock_tools_client.run_actor_and_get_items.assert_called_once_with('apify/test', {'q': '1'}, 300, None, 50)
 
 
 def test_run_actor_and_get_items_tool_failure_raises_tool_exception(mock_tools_client: MagicMock) -> None:
-    mock_tools_client.run_actor_and_get_items.side_effect = RuntimeError('Actor run run-bad ended with status TIMED-OUT.')
+    mock_tools_client.run_actor_and_get_items.side_effect = RuntimeError(
+        'Actor run run-bad ended with status TIMED-OUT.'
+    )
     tool = _make_tool(ApifyRunActorAndGetItemsTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='TIMED-OUT'):

From c2b9cb6c68a862fa9f602d9e669988d19611238d Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 10:36:30 +0200
Subject: [PATCH 08/63] feat: enhance error handling and documentation for
 apify tools

---
 langchain_apify/_client.py     | 30 +++++++++++++---
 langchain_apify/tools.py       | 63 ++++++++++++++++++++++++++++++----
 tests/unit_tests/test_tools.py | 35 +++++++++++++++++++
 3 files changed, 117 insertions(+), 11 deletions(-)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index bf3e01a..8434428 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -65,7 +65,11 @@ def run_actor(
         if memory_mbytes is not None:
             call_kwargs['memory_mbytes'] = memory_mbytes
 
-        run = self._client.actor(actor_id).call(**call_kwargs)
+        try:
+            run = self._client.actor(actor_id).call(**call_kwargs)
+        except Exception as exc:
+            msg = f'Network error calling Actor {actor_id}: {exc}'
+            raise RuntimeError(msg) from exc
         if run is None:
             msg = f'Actor {actor_id} call returned no run details.'
             raise RuntimeError(msg)
@@ -85,7 +89,11 @@ def get_dataset_items(
         Returns:
             List of dataset item dicts (may be empty).
         """
-        return self._client.dataset(dataset_id).list_items(limit=limit, offset=offset, clean=True).items
+        try:
+            return self._client.dataset(dataset_id).list_items(limit=limit, offset=offset, clean=True).items
+        except Exception as exc:
+            msg = f'Network error fetching dataset {dataset_id}: {exc}'
+            raise RuntimeError(msg) from exc
 
     def run_actor_and_get_items(
         self,
@@ -112,7 +120,11 @@ def run_actor_and_get_items(
         """
         run = self.run_actor(actor_id, run_input, timeout_secs, memory_mbytes)
         dataset_id = run.get('defaultDatasetId', '')
-        items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
+        try:
+            items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
+        except Exception as exc:
+            msg = f'Network error fetching dataset {dataset_id}: {exc}'
+            raise RuntimeError(msg) from exc
         return run, items
 
     def run_task(
@@ -141,7 +153,11 @@ def run_task(
         if memory_mbytes is not None:
             call_kwargs['memory_mbytes'] = memory_mbytes
 
-        run = self._client.task(task_id).call(**call_kwargs)
+        try:
+            run = self._client.task(task_id).call(**call_kwargs)
+        except Exception as exc:
+            msg = f'Network error calling task {task_id}: {exc}'
+            raise RuntimeError(msg) from exc
         if run is None:
             msg = f'Task {task_id} call returned no run details.'
             raise RuntimeError(msg)
@@ -174,7 +190,11 @@ def run_task_and_get_items(
         """
         run = self.run_task(task_id, task_input, timeout_secs, memory_mbytes)
         dataset_id = run.get('defaultDatasetId', '')
-        items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
+        try:
+            items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
+        except Exception as exc:
+            msg = f'Network error fetching dataset {dataset_id}: {exc}'
+            raise RuntimeError(msg) from exc
         return run, items
 
     def scrape_url(self, url: str, timeout_secs: int = _DEFAULT_SCRAPE_TIMEOUT_SECS) -> str:
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index a751dad..9b433f3 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -266,6 +266,14 @@ class ApifyRunActorTool(BaseTool):  # type: ignore[override]
     string.  Use :class:`ApifyGetDatasetItemsTool` afterwards to retrieve the
     results from the dataset.
 
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+
+    Returns:
+        JSON string with keys ``run_id``, ``status``, ``dataset_id``,
+        ``started_at``, and ``finished_at``.
+
     Example:
         .. code-block:: python
 
@@ -283,8 +291,12 @@ class ApifyRunActorTool(BaseTool):  # type: ignore[override]
 
     name: str = 'apify_run_actor'
     description: str = (
-        'Run an Apify Actor synchronously and return run metadata'
-        ' (run_id, status, dataset_id, timestamps) as a JSON string.'
+        'Run an Apify Actor synchronously and return run metadata as a JSON string.'
+        ' Required: actor_id (str) — Actor ID or name (e.g. "apify/python-example").'
+        ' Optional: run_input (dict), timeout_secs (int, default 300),'
+        ' memory_mbytes (int|null).'
+        ' Returns JSON with keys: run_id, status, dataset_id, started_at, finished_at.'
+        ' Use apify_get_dataset_items with the returned dataset_id to fetch results.'
     )
     args_schema: type[BaseModel] = ApifyRunActorInput
     handle_tool_error: bool = True
@@ -316,6 +328,14 @@ class ApifyGetDatasetItemsTool(BaseTool):  # type: ignore[override]
     Returns items as a JSON string.  When the dataset is empty the tool returns
     an informative JSON message instead of raising an error.
 
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+
+    Returns:
+        JSON array of item dicts, or ``{"items": [], "message": "..."}`` when
+        the dataset is empty.
+
     Example:
         .. code-block:: python
 
@@ -329,7 +349,12 @@ class ApifyGetDatasetItemsTool(BaseTool):  # type: ignore[override]
     """
 
     name: str = 'apify_get_dataset_items'
-    description: str = 'Fetch items from an Apify dataset by ID. Returns a JSON array of items.'
+    description: str = (
+        'Fetch items from an Apify dataset by ID. Returns a JSON array of item dicts.'
+        ' Required: dataset_id (str) — Apify dataset ID.'
+        ' Optional: limit (int, default 100), offset (int, default 0).'
+        ' Returns an empty JSON object with a message when the dataset is empty.'
+    )
     args_schema: type[BaseModel] = ApifyGetDatasetItemsInput
     handle_tool_error: bool = True
 
@@ -359,6 +384,15 @@ class ApifyRunActorAndGetItemsTool(BaseTool):  # type: ignore[override]
     into a single call.  Returns a JSON string with ``run`` (metadata) and
     ``items`` (list of dicts) keys.
 
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+
+    Returns:
+        JSON string with two keys: ``run`` (dict with ``run_id``, ``status``,
+        ``dataset_id``, ``started_at``, ``finished_at``) and ``items`` (list
+        of dataset item dicts).
+
     Example:
         .. code-block:: python
 
@@ -376,8 +410,12 @@ class ApifyRunActorAndGetItemsTool(BaseTool):  # type: ignore[override]
 
     name: str = 'apify_run_actor_and_get_items'
     description: str = (
-        'Run an Apify Actor synchronously and return both run metadata and'
-        ' dataset items as a JSON string with "run" and "items" keys.'
+        'Run an Apify Actor synchronously and return both run metadata and dataset items.'
+        ' Required: actor_id (str) — Actor ID or name (e.g. "apify/python-example").'
+        ' Optional: run_input (dict), timeout_secs (int, default 300),'
+        ' memory_mbytes (int|null), dataset_items_limit (int, default 100).'
+        ' Returns JSON with keys: run (run_id, status, dataset_id, started_at, finished_at)'
+        ' and items (list of dataset item dicts).'
     )
     args_schema: type[BaseModel] = ApifyRunActorAndGetItemsInput
     handle_tool_error: bool = True
@@ -413,6 +451,14 @@ class ApifyScrapeUrlTool(BaseTool):  # type: ignore[override]
     ``maxCrawlPages=1``.  Returns the page content as a plain markdown string
     (not JSON).
 
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+
+    Returns:
+        Markdown string with the full text content of the scraped page, or a
+        plain-text fallback when markdown is unavailable.
+
     Example:
         .. code-block:: python
 
@@ -426,7 +472,12 @@ class ApifyScrapeUrlTool(BaseTool):  # type: ignore[override]
     """
 
     name: str = 'apify_scrape_url'
-    description: str = 'Scrape a single URL using Apify and return its content as markdown text.'
+    description: str = (
+        'Scrape a single URL using Apify and return its full content as a markdown string.'
+        ' Required: url (str) — the URL to scrape.'
+        ' Optional: timeout_secs (int, default 120).'
+        ' Returns the page content as markdown (or plain text if markdown is unavailable).'
+    )
     args_schema: type[BaseModel] = ApifyScrapeUrlInput
     handle_tool_error: bool = True
 
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index af43843..91c53a0 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -8,6 +8,7 @@
 from langchain_core.tools import ToolException
 from pydantic import BaseModel
 
+from langchain_apify import APIFY_CORE_TOOLS
 from langchain_apify._client import ApifyToolsClient
 from langchain_apify.tools import (
     ApifyActorsTool,
@@ -189,6 +190,12 @@ def test_get_dataset_items_tool_empty_returns_message(mock_tools_client: MagicMo
     assert 'empty' in parsed['message'].lower()
 
 
+def test_get_dataset_items_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyGetDatasetItemsTool()
+
+
 # ---------------------------------------------------------------------------
 # ApifyRunActorAndGetItemsTool
 # ---------------------------------------------------------------------------
@@ -217,6 +224,12 @@ def test_run_actor_and_get_items_tool_failure_raises_tool_exception(mock_tools_c
         tool._run(actor_id='apify/test')
 
 
+def test_run_actor_and_get_items_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyRunActorAndGetItemsTool()
+
+
 # ---------------------------------------------------------------------------
 # ApifyScrapeUrlTool
 # ---------------------------------------------------------------------------
@@ -240,6 +253,12 @@ def test_scrape_url_tool_empty_raises_tool_exception(mock_tools_client: MagicMoc
         tool._run(url='https://example.com')
 
 
+def test_scrape_url_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyScrapeUrlTool()
+
+
 # ---------------------------------------------------------------------------
 # Tool metadata assertions
 # ---------------------------------------------------------------------------
@@ -267,3 +286,19 @@ def test_generic_tools_have_correct_metadata() -> None:
         assert tool.description
         assert tool.args_schema is not None
         assert tool.handle_tool_error is True
+
+
+# ---------------------------------------------------------------------------
+# APIFY_CORE_TOOLS list
+# ---------------------------------------------------------------------------
+
+
+def test_apify_core_tools_contains_all_four_classes() -> None:
+    """APIFY_CORE_TOOLS must list exactly the 4 generic tool classes."""
+    assert set(APIFY_CORE_TOOLS) == {
+        ApifyRunActorTool,
+        ApifyGetDatasetItemsTool,
+        ApifyRunActorAndGetItemsTool,
+        ApifyScrapeUrlTool,
+    }
+    assert len(APIFY_CORE_TOOLS) == 4

From 3edf1265fcbc368494a734a910bfcc9015324d94 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 10:58:37 +0200
Subject: [PATCH 09/63] fix: iso format fix

---
 langchain_apify/tools.py       |  4 +-
 tests/unit_tests/test_tools.py | 73 ++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 9b433f3..7e1f11d 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -3,6 +3,7 @@
 import json
 import os
 from typing import TYPE_CHECKING, Any
+from datetime import datetime
 
 from apify_client import ApifyClient
 from langchain_core.tools import BaseTool, ToolException
@@ -239,7 +240,8 @@ class ApifyScrapeUrlInput(BaseModel):
 
 
 def _iso(value: str | None) -> str | None:
-    """Pass through an ISO timestamp or *None*."""
+    if isinstance(value, datetime):
+        return value.isoformat()
     return value
 
 
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 91c53a0..5afb962 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+from datetime import datetime, timezone
 from typing import TYPE_CHECKING, Any
 from unittest.mock import MagicMock, patch
 
@@ -16,6 +17,8 @@
     ApifyRunActorAndGetItemsTool,
     ApifyRunActorTool,
     ApifyScrapeUrlTool,
+    _iso,
+    _run_meta,
 )
 from langchain_apify.utils import actor_id_to_tool_name
 
@@ -128,6 +131,76 @@ def _make_tool(tool_cls: type, mock_client: MagicMock) -> Any:  # noqa: ANN401
     return tool
 
 
+# ---------------------------------------------------------------------------
+# _iso / _run_meta helpers
+# ---------------------------------------------------------------------------
+
+
+def test_iso_converts_datetime_to_string() -> None:
+    dt = datetime(2025, 6, 15, 12, 30, 45, tzinfo=timezone.utc)
+    assert _iso(dt) == '2025-06-15T12:30:45+00:00'
+
+
+def test_iso_passes_through_string() -> None:
+    assert _iso('2025-01-01T00:00:00.000Z') == '2025-01-01T00:00:00.000Z'
+
+
+def test_iso_passes_through_none() -> None:
+    assert _iso(None) is None
+
+
+def test_run_meta_with_datetime_values_is_json_serializable() -> None:
+    run = {
+        'id': 'run-dt',
+        'status': 'SUCCEEDED',
+        'defaultDatasetId': 'ds-dt',
+        'startedAt': datetime(2025, 3, 1, 10, 0, 0, tzinfo=timezone.utc),
+        'finishedAt': datetime(2025, 3, 1, 10, 1, 0, tzinfo=timezone.utc),
+    }
+    meta = _run_meta(run)
+    serialized = json.dumps(meta)
+    parsed = json.loads(serialized)
+    assert parsed['run_id'] == 'run-dt'
+    assert parsed['started_at'] == '2025-03-01T10:00:00+00:00'
+    assert parsed['finished_at'] == '2025-03-01T10:01:00+00:00'
+
+
+def test_run_meta_with_string_values_is_json_serializable() -> None:
+    meta = _run_meta(_SUCCEEDED_RUN)
+    serialized = json.dumps(meta)
+    parsed = json.loads(serialized)
+    assert parsed['started_at'] == '2025-01-01T00:00:00.000Z'
+    assert parsed['finished_at'] == '2025-01-01T00:01:00.000Z'
+
+
+def test_run_meta_with_missing_timestamps() -> None:
+    run = {'id': 'run-none', 'status': 'RUNNING', 'defaultDatasetId': 'ds-none'}
+    meta = _run_meta(run)
+    serialized = json.dumps(meta)
+    parsed = json.loads(serialized)
+    assert parsed['started_at'] is None
+    assert parsed['finished_at'] is None
+
+
+def test_run_actor_tool_with_datetime_run(mock_tools_client: MagicMock) -> None:
+    """End-to-end: ApifyRunActorTool returns valid JSON when the client returns datetime objects."""
+    mock_tools_client.run_actor.return_value = {
+        'id': 'run-real',
+        'status': 'SUCCEEDED',
+        'defaultDatasetId': 'ds-real',
+        'startedAt': datetime(2025, 6, 1, 8, 0, 0, tzinfo=timezone.utc),
+        'finishedAt': datetime(2025, 6, 1, 8, 5, 0, tzinfo=timezone.utc),
+    }
+    tool = _make_tool(ApifyRunActorTool, mock_tools_client)
+
+    result = tool._run(actor_id='apify/test')
+
+    parsed = json.loads(result)
+    assert parsed['run_id'] == 'run-real'
+    assert parsed['started_at'] == '2025-06-01T08:00:00+00:00'
+    assert parsed['finished_at'] == '2025-06-01T08:05:00+00:00'
+
+
 # ---------------------------------------------------------------------------
 # ApifyRunActorTool
 # ---------------------------------------------------------------------------

From 8c36edc824f9e866d7c463d539a5faa401a4f299 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 12:58:32 +0200
Subject: [PATCH 10/63] feat: add apify run task and apify run task and get
 items tools with input schemas

---
 langchain_apify/__init__.py     |   6 ++
 langchain_apify/_client.py      |   2 +-
 langchain_apify/tools.py        | 150 ++++++++++++++++++++++++++++++++
 tests/unit_tests/test_client.py |   4 +-
 tests/unit_tests/test_tools.py  |  84 +++++++++++++++++-
 5 files changed, 239 insertions(+), 7 deletions(-)

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
index 1b65eef..fa1f369 100644
--- a/langchain_apify/__init__.py
+++ b/langchain_apify/__init__.py
@@ -8,6 +8,8 @@
     ApifyGetDatasetItemsTool,
     ApifyRunActorAndGetItemsTool,
     ApifyRunActorTool,
+    ApifyRunTaskAndGetItemsTool,
+    ApifyRunTaskTool,
     ApifyScrapeUrlTool,
 )
 from langchain_apify.wrappers import ApifyWrapper
@@ -27,6 +29,8 @@
     ApifyGetDatasetItemsTool,
     ApifyRunActorAndGetItemsTool,
     ApifyScrapeUrlTool,
+    ApifyRunTaskTool,
+    ApifyRunTaskAndGetItemsTool,
 ]
 
 __all__ = [
@@ -38,6 +42,8 @@
     'ApifyGetDatasetItemsTool',
     'ApifyRunActorAndGetItemsTool',
     'ApifyRunActorTool',
+    'ApifyRunTaskAndGetItemsTool',
+    'ApifyRunTaskTool',
     'ApifyScrapeUrlTool',
     # Tool group lists
     'APIFY_CORE_TOOLS',
diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index 8434428..c3ed22e 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -61,7 +61,7 @@ def run_actor(
         Raises:
             RuntimeError: If the run does not finish with status ``SUCCEEDED``.
         """
-        call_kwargs: dict = {'run_input': run_input, 'timeout_secs': timeout_secs}
+        call_kwargs: dict = {'run_input': run_input, 'timeout_secs': timeout_secs, 'logger': None}
         if memory_mbytes is not None:
             call_kwargs['memory_mbytes'] = memory_mbytes
 
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 7e1f11d..421aefd 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -234,6 +234,25 @@ class ApifyScrapeUrlInput(BaseModel):
     timeout_secs: int = Field(default=120, description='Maximum time in seconds to wait for the crawl to finish.')
 
 
+class ApifyRunTaskInput(BaseModel):
+    """Input schema for :class:`ApifyRunTaskTool`."""
+
+    task_id: str = Field(description='Task ID or name (e.g. "user/my-task").')
+    task_input: dict | None = Field(default=None, description='JSON-serialisable input that overrides the task\'s pre-saved input.')
+    timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the run to finish.')
+    memory_mbytes: int | None = Field(default=None, description='Memory limit in MB for the run, or null for task default.')
+
+
+class ApifyRunTaskAndGetItemsInput(BaseModel):
+    """Input schema for :class:`ApifyRunTaskAndGetItemsTool`."""
+
+    task_id: str = Field(description='Task ID or name (e.g. "user/my-task").')
+    task_input: dict | None = Field(default=None, description='JSON-serialisable input that overrides the task\'s pre-saved input.')
+    timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the run to finish.')
+    memory_mbytes: int | None = Field(default=None, description='Memory limit in MB for the run, or null for task default.')
+    dataset_items_limit: int = Field(default=100, description='Maximum number of dataset items to return.')
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -499,3 +518,134 @@ def _run(
             return self._client.scrape_url(url, timeout_secs)
         except RuntimeError as exc:
             raise ToolException(str(exc)) from exc
+
+
+class ApifyRunTaskTool(BaseTool):  # type: ignore[override]
+    """Run a saved Apify Actor task by ID and return run metadata.
+
+    Actor tasks are pre-configured Actor runs saved in the Apify Console.
+    This tool starts a task with optional input overrides and returns run
+    metadata (run ID, status, dataset ID, timestamps) as a JSON string.
+    Use :class:`ApifyGetDatasetItemsTool` afterwards to retrieve results.
+
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+
+    Returns:
+        JSON string with keys ``run_id``, ``status``, ``dataset_id``,
+        ``started_at``, and ``finished_at``.
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifyRunTaskTool
+
+            tool = ApifyRunTaskTool()
+            result = tool.invoke({
+                "task_id": "user/my-task",
+                "task_input": {"key": "value"},
+            })
+    """
+
+    name: str = 'apify_run_task'
+    description: str = (
+        'Run a saved Apify Actor task synchronously and return run metadata as a JSON string.'
+        ' Required: task_id (str) — task ID or name (e.g. "user/my-task").'
+        ' Optional: task_input (dict), timeout_secs (int, default 300),'
+        ' memory_mbytes (int|null).'
+        ' Returns JSON with keys: run_id, status, dataset_id, started_at, finished_at.'
+        ' Use apify_get_dataset_items with the returned dataset_id to fetch results.'
+    )
+    args_schema: type[BaseModel] = ApifyRunTaskInput
+    handle_tool_error: bool = True
+
+    _client: ApifyToolsClient
+
+    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
+        super().__init__(**kwargs)
+        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
+
+    def _run(
+        self,
+        task_id: str,
+        task_input: dict | None = None,
+        timeout_secs: int = 300,
+        memory_mbytes: int | None = None,
+        _run_manager: CallbackManagerForToolRun | None = None,
+    ) -> str:
+        try:
+            run = self._client.run_task(task_id, task_input, timeout_secs, memory_mbytes)
+        except RuntimeError as exc:
+            raise ToolException(str(exc)) from exc
+        return json.dumps(_run_meta(run))
+
+
+class ApifyRunTaskAndGetItemsTool(BaseTool):  # type: ignore[override]
+    """Run a saved Apify Actor task and return both run metadata and dataset items.
+
+    Combines :class:`ApifyRunTaskTool` and :class:`ApifyGetDatasetItemsTool`
+    into a single call.  Returns a JSON string with ``run`` (metadata) and
+    ``items`` (list of dicts) keys.
+
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+
+    Returns:
+        JSON string with two keys: ``run`` (dict with ``run_id``, ``status``,
+        ``dataset_id``, ``started_at``, ``finished_at``) and ``items`` (list
+        of dataset item dicts).
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifyRunTaskAndGetItemsTool
+
+            tool = ApifyRunTaskAndGetItemsTool()
+            result = tool.invoke({
+                "task_id": "user/my-task",
+                "task_input": {"key": "value"},
+            })
+    """
+
+    name: str = 'apify_run_task_and_get_items'
+    description: str = (
+        'Run a saved Apify Actor task synchronously and return both run metadata and dataset items.'
+        ' Required: task_id (str) — task ID or name (e.g. "user/my-task").'
+        ' Optional: task_input (dict), timeout_secs (int, default 300),'
+        ' memory_mbytes (int|null), dataset_items_limit (int, default 100).'
+        ' Returns JSON with keys: run (run_id, status, dataset_id, started_at, finished_at)'
+        ' and items (list of dataset item dicts).'
+    )
+    args_schema: type[BaseModel] = ApifyRunTaskAndGetItemsInput
+    handle_tool_error: bool = True
+
+    _client: ApifyToolsClient
+
+    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
+        super().__init__(**kwargs)
+        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
+
+    def _run(
+        self,
+        task_id: str,
+        task_input: dict | None = None,
+        timeout_secs: int = 300,
+        memory_mbytes: int | None = None,
+        dataset_items_limit: int = 100,
+        _run_manager: CallbackManagerForToolRun | None = None,
+    ) -> str:
+        try:
+            run, items = self._client.run_task_and_get_items(
+                task_id, task_input, timeout_secs, memory_mbytes, dataset_items_limit
+            )
+        except RuntimeError as exc:
+            raise ToolException(str(exc)) from exc
+        return json.dumps({'run': _run_meta(run), 'items': items})
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index 89862b1..95193c4 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -73,7 +73,7 @@ def test_run_actor_success(client: ApifyToolsClient, mock_apify_client: MagicMoc
     result = client.run_actor('apify/test-actor', run_input={'key': 'val'})
 
     mock_apify_client.actor.assert_called_once_with('apify/test-actor')
-    mock_apify_client.actor.return_value.call.assert_called_once_with(run_input={'key': 'val'}, timeout_secs=300)
+    mock_apify_client.actor.return_value.call.assert_called_once_with(run_input={'key': 'val'}, timeout_secs=300, logger=None)
     assert result == _SUCCEEDED_RUN
 
 
@@ -83,7 +83,7 @@ def test_run_actor_with_memory(client: ApifyToolsClient, mock_apify_client: Magi
     client.run_actor('apify/test-actor', memory_mbytes=512)
 
     mock_apify_client.actor.return_value.call.assert_called_once_with(
-        run_input=None, timeout_secs=300, memory_mbytes=512
+        run_input=None, timeout_secs=300, logger=None, memory_mbytes=512
     )
 
 
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 5afb962..1d61eb2 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -16,6 +16,8 @@
     ApifyGetDatasetItemsTool,
     ApifyRunActorAndGetItemsTool,
     ApifyRunActorTool,
+    ApifyRunTaskAndGetItemsTool,
+    ApifyRunTaskTool,
     ApifyScrapeUrlTool,
     _iso,
     _run_meta,
@@ -332,19 +334,89 @@ def test_scrape_url_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
         ApifyScrapeUrlTool()
 
 
+# ---------------------------------------------------------------------------
+# ApifyRunTaskTool
+# ---------------------------------------------------------------------------
+
+
+def test_run_task_tool_returns_json(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_task.return_value = _SUCCEEDED_RUN
+    tool = _make_tool(ApifyRunTaskTool, mock_tools_client)
+
+    result = tool._run(task_id='user/my-task', task_input={'key': 'val'})
+
+    parsed = json.loads(result)
+    assert parsed['run_id'] == 'run-abc'
+    assert parsed['status'] == 'SUCCEEDED'
+    assert parsed['dataset_id'] == 'dataset-xyz'
+    assert parsed['started_at'] == '2025-01-01T00:00:00.000Z'
+    assert parsed['finished_at'] == '2025-01-01T00:01:00.000Z'
+    mock_tools_client.run_task.assert_called_once_with('user/my-task', {'key': 'val'}, 300, None)
+
+
+def test_run_task_tool_failure_raises_tool_exception(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_task.side_effect = RuntimeError('Actor run run-bad ended with status FAILED.')
+    tool = _make_tool(ApifyRunTaskTool, mock_tools_client)
+
+    with pytest.raises(ToolException, match='FAILED'):
+        tool._run(task_id='user/my-task')
+
+
+def test_run_task_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyRunTaskTool()
+
+
+# ---------------------------------------------------------------------------
+# ApifyRunTaskAndGetItemsTool
+# ---------------------------------------------------------------------------
+
+
+def test_run_task_and_get_items_tool_returns_json(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_task_and_get_items.return_value = (_SUCCEEDED_RUN, _SAMPLE_ITEMS)
+    tool = _make_tool(ApifyRunTaskAndGetItemsTool, mock_tools_client)
+
+    result = tool._run(task_id='user/my-task', task_input={'q': '1'}, dataset_items_limit=50)
+
+    parsed = json.loads(result)
+    assert parsed['run']['run_id'] == 'run-abc'
+    assert parsed['run']['status'] == 'SUCCEEDED'
+    assert len(parsed['items']) == 2
+    mock_tools_client.run_task_and_get_items.assert_called_once_with('user/my-task', {'q': '1'}, 300, None, 50)
+
+
+def test_run_task_and_get_items_tool_failure_raises_tool_exception(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_task_and_get_items.side_effect = RuntimeError(
+        'Actor run run-bad ended with status TIMED-OUT.'
+    )
+    tool = _make_tool(ApifyRunTaskAndGetItemsTool, mock_tools_client)
+
+    with pytest.raises(ToolException, match='TIMED-OUT'):
+        tool._run(task_id='user/my-task')
+
+
+def test_run_task_and_get_items_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyRunTaskAndGetItemsTool()
+
+
 # ---------------------------------------------------------------------------
 # Tool metadata assertions
 # ---------------------------------------------------------------------------
 
 
 def test_generic_tools_have_correct_metadata() -> None:
-    """Verify name, description, and args_schema are set on all 4 tools."""
+    """Verify name, description, and args_schema are set on all generic tools."""
     with patch.object(ApifyToolsClient, '__init__', return_value=None):
         tools = [
             ApifyRunActorTool(apify_api_token='dummy'),
             ApifyGetDatasetItemsTool(apify_api_token='dummy'),
             ApifyRunActorAndGetItemsTool(apify_api_token='dummy'),
             ApifyScrapeUrlTool(apify_api_token='dummy'),
+            ApifyRunTaskTool(apify_api_token='dummy'),
+            ApifyRunTaskAndGetItemsTool(apify_api_token='dummy'),
         ]
 
     expected_names = [
@@ -352,6 +424,8 @@ def test_generic_tools_have_correct_metadata() -> None:
         'apify_get_dataset_items',
         'apify_run_actor_and_get_items',
         'apify_scrape_url',
+        'apify_run_task',
+        'apify_run_task_and_get_items',
     ]
 
     for tool, expected_name in zip(tools, expected_names):
@@ -366,12 +440,14 @@ def test_generic_tools_have_correct_metadata() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_apify_core_tools_contains_all_four_classes() -> None:
-    """APIFY_CORE_TOOLS must list exactly the 4 generic tool classes."""
+def test_apify_core_tools_contains_all_generic_classes() -> None:
+    """APIFY_CORE_TOOLS must list exactly the 6 generic tool classes."""
     assert set(APIFY_CORE_TOOLS) == {
         ApifyRunActorTool,
         ApifyGetDatasetItemsTool,
         ApifyRunActorAndGetItemsTool,
         ApifyScrapeUrlTool,
+        ApifyRunTaskTool,
+        ApifyRunTaskAndGetItemsTool,
     }
-    assert len(APIFY_CORE_TOOLS) == 4
+    assert len(APIFY_CORE_TOOLS) == 6

From 026175a49471d1bfa826ad5655dc6ec31696e47b Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 13:50:44 +0200
Subject: [PATCH 11/63] feat: introduce _ApifyGenericTool base class for Apify
 tools to streamline client handling and error management

---
 langchain_apify/tools.py       | 76 +++++++++++++---------------------
 tests/unit_tests/test_tools.py | 24 +++++++++++
 2 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 421aefd..93d884a 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -275,12 +275,34 @@ def _run_meta(run: dict) -> dict:
     }
 
 
+# ---------------------------------------------------------------------------
+# Shared base for generic tools
+# ---------------------------------------------------------------------------
+
+
+class _ApifyGenericTool(BaseTool):  # type: ignore[override]
+    """Shared base for all generic Apify tools.
+
+    Handles ``ApifyToolsClient`` creation and sets ``handle_tool_error``.
+    Subclasses only need to declare ``name``, ``description``,
+    ``args_schema``, and ``_run()``.
+    """
+
+    handle_tool_error: bool = True
+
+    _client: ApifyToolsClient
+
+    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
+        super().__init__(**kwargs)
+        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
+
+
 # ---------------------------------------------------------------------------
 # Generic tools
 # ---------------------------------------------------------------------------
 
 
-class ApifyRunActorTool(BaseTool):  # type: ignore[override]
+class ApifyRunActorTool(_ApifyGenericTool):
     """Run any Apify Actor by ID with an arbitrary JSON input.
 
     Returns run metadata (run ID, status, dataset ID, timestamps) as a JSON
@@ -320,13 +342,6 @@ class ApifyRunActorTool(BaseTool):  # type: ignore[override]
         ' Use apify_get_dataset_items with the returned dataset_id to fetch results.'
     )
     args_schema: type[BaseModel] = ApifyRunActorInput
-    handle_tool_error: bool = True
-
-    _client: ApifyToolsClient
-
-    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
-        super().__init__(**kwargs)
-        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
     def _run(
         self,
@@ -343,7 +358,7 @@ def _run(
         return json.dumps(_run_meta(run))
 
 
-class ApifyGetDatasetItemsTool(BaseTool):  # type: ignore[override]
+class ApifyGetDatasetItemsTool(_ApifyGenericTool):
     """Fetch items from an existing Apify dataset by ID.
 
     Returns items as a JSON string.  When the dataset is empty the tool returns
@@ -377,13 +392,6 @@ class ApifyGetDatasetItemsTool(BaseTool):  # type: ignore[override]
         ' Returns an empty JSON object with a message when the dataset is empty.'
     )
     args_schema: type[BaseModel] = ApifyGetDatasetItemsInput
-    handle_tool_error: bool = True
-
-    _client: ApifyToolsClient
-
-    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
-        super().__init__(**kwargs)
-        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
     def _run(
         self,
@@ -398,7 +406,7 @@ def _run(
         return json.dumps(items)
 
 
-class ApifyRunActorAndGetItemsTool(BaseTool):  # type: ignore[override]
+class ApifyRunActorAndGetItemsTool(_ApifyGenericTool):
     """Run any Apify Actor and return both run metadata and dataset items.
 
     Combines :class:`ApifyRunActorTool` and :class:`ApifyGetDatasetItemsTool`
@@ -439,13 +447,6 @@ class ApifyRunActorAndGetItemsTool(BaseTool):  # type: ignore[override]
         ' and items (list of dataset item dicts).'
     )
     args_schema: type[BaseModel] = ApifyRunActorAndGetItemsInput
-    handle_tool_error: bool = True
-
-    _client: ApifyToolsClient
-
-    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
-        super().__init__(**kwargs)
-        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
     def _run(
         self,
@@ -465,7 +466,7 @@ def _run(
         return json.dumps({'run': _run_meta(run), 'items': items})
 
 
-class ApifyScrapeUrlTool(BaseTool):  # type: ignore[override]
+class ApifyScrapeUrlTool(_ApifyGenericTool):
     """Scrape a single URL and return its content as markdown.
 
     Uses the ``apify/website-content-crawler`` Actor under the hood with
@@ -500,13 +501,6 @@ class ApifyScrapeUrlTool(BaseTool):  # type: ignore[override]
         ' Returns the page content as markdown (or plain text if markdown is unavailable).'
     )
     args_schema: type[BaseModel] = ApifyScrapeUrlInput
-    handle_tool_error: bool = True
-
-    _client: ApifyToolsClient
-
-    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
-        super().__init__(**kwargs)
-        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
     def _run(
         self,
@@ -520,7 +514,7 @@ def _run(
             raise ToolException(str(exc)) from exc
 
 
-class ApifyRunTaskTool(BaseTool):  # type: ignore[override]
+class ApifyRunTaskTool(_ApifyGenericTool):
     """Run a saved Apify Actor task by ID and return run metadata.
 
     Actor tasks are pre-configured Actor runs saved in the Apify Console.
@@ -561,13 +555,6 @@ class ApifyRunTaskTool(BaseTool):  # type: ignore[override]
         ' Use apify_get_dataset_items with the returned dataset_id to fetch results.'
     )
     args_schema: type[BaseModel] = ApifyRunTaskInput
-    handle_tool_error: bool = True
-
-    _client: ApifyToolsClient
-
-    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
-        super().__init__(**kwargs)
-        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
     def _run(
         self,
@@ -584,7 +571,7 @@ def _run(
         return json.dumps(_run_meta(run))
 
 
-class ApifyRunTaskAndGetItemsTool(BaseTool):  # type: ignore[override]
+class ApifyRunTaskAndGetItemsTool(_ApifyGenericTool):
     """Run a saved Apify Actor task and return both run metadata and dataset items.
 
     Combines :class:`ApifyRunTaskTool` and :class:`ApifyGetDatasetItemsTool`
@@ -625,13 +612,6 @@ class ApifyRunTaskAndGetItemsTool(BaseTool):  # type: ignore[override]
         ' and items (list of dataset item dicts).'
     )
     args_schema: type[BaseModel] = ApifyRunTaskAndGetItemsInput
-    handle_tool_error: bool = True
-
-    _client: ApifyToolsClient
-
-    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
-        super().__init__(**kwargs)
-        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
     def _run(
         self,
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 1d61eb2..025b486 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -19,6 +19,7 @@
     ApifyRunTaskAndGetItemsTool,
     ApifyRunTaskTool,
     ApifyScrapeUrlTool,
+    _ApifyGenericTool,
     _iso,
     _run_meta,
 )
@@ -435,6 +436,29 @@ def test_generic_tools_have_correct_metadata() -> None:
         assert tool.handle_tool_error is True
 
 
+# ---------------------------------------------------------------------------
+# _ApifyGenericTool inheritance
+# ---------------------------------------------------------------------------
+
+
+def test_all_generic_tools_inherit_from_base() -> None:
+    """Every generic tool must be a subclass of _ApifyGenericTool."""
+    for tool_cls in (
+        ApifyRunActorTool,
+        ApifyGetDatasetItemsTool,
+        ApifyRunActorAndGetItemsTool,
+        ApifyScrapeUrlTool,
+        ApifyRunTaskTool,
+        ApifyRunTaskAndGetItemsTool,
+    ):
+        assert issubclass(tool_cls, _ApifyGenericTool), f'{tool_cls.__name__} must extend _ApifyGenericTool'
+
+
+def test_legacy_tool_does_not_inherit_from_generic_base() -> None:
+    """ApifyActorsTool is legacy and must NOT inherit from _ApifyGenericTool."""
+    assert not issubclass(ApifyActorsTool, _ApifyGenericTool)
+
+
 # ---------------------------------------------------------------------------
 # APIFY_CORE_TOOLS list
 # ---------------------------------------------------------------------------

From 110c971c539509827b426e5bcb60c43b72d0d935 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 14:19:14 +0200
Subject: [PATCH 12/63] feat: add _actor_tools.py file to define upcomming
 search and social media tools for apify integration

---
 langchain_apify/_actor_tools.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 langchain_apify/_actor_tools.py

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
new file mode 100644
index 0000000..d7bd850
--- /dev/null
+++ b/langchain_apify/_actor_tools.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING, Any
+
+from langchain_core.tools import ToolException
+from pydantic import BaseModel, Field
+
+from langchain_apify._client import ApifyToolsClient
+from langchain_apify.tools import _ApifyGenericTool, _run_meta
+
+if TYPE_CHECKING:
+    from langchain_core.callbacks import CallbackManagerForToolRun
+
+
+# ---------------------------------------------------------------------------
+# Search & Crawling tools
+# ---------------------------------------------------------------------------
+
+
+# ---------------------------------------------------------------------------
+# Social-media tools
+# ---------------------------------------------------------------------------
\ No newline at end of file

From a08f63ec458179798ca337d1a380cea332b629ef Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 14:46:35 +0200
Subject: [PATCH 13/63] fix: add try/except to match others

---
 langchain_apify/tools.py       | 5 ++++-
 tests/unit_tests/test_tools.py | 8 ++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 93d884a..8315bdc 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -400,7 +400,10 @@ def _run(
         offset: int = 0,
         _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
-        items = self._client.get_dataset_items(dataset_id, limit, offset)
+        try:
+            items = self._client.get_dataset_items(dataset_id, limit, offset)
+        except RuntimeError as exc:
+            raise ToolException(str(exc)) from exc
         if not items:
             return json.dumps({'items': [], 'message': 'Dataset is empty or not found.'})
         return json.dumps(items)
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 025b486..331054d 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -266,6 +266,14 @@ def test_get_dataset_items_tool_empty_returns_message(mock_tools_client: MagicMo
     assert 'empty' in parsed['message'].lower()
 
 
+def test_get_dataset_items_tool_network_error_raises_tool_exception(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.get_dataset_items.side_effect = RuntimeError('Network error fetching dataset ds-bad: connection reset')
+    tool = _make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
+
+    with pytest.raises(ToolException, match='Network error fetching dataset'):
+        tool._run(dataset_id='ds-bad')
+
+
 def test_get_dataset_items_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
     with pytest.raises(ValueError, match='APIFY_API_TOKEN'):

From d028531588602a1cf1249803b18b41c8f13b3b6a Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 14:57:32 +0200
Subject: [PATCH 14/63] fix: update timeout constants and improve input schema
 descripiton in Apify tools

---
 langchain_apify/_client.py |  7 ++++---
 langchain_apify/tools.py   | 21 +++++++++++++++------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index c3ed22e..0409fe0 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -14,7 +14,6 @@
 _SCRAPE_ACTOR_ID = 'apify/website-content-crawler'
 _DEFAULT_RUN_TIMEOUT_SECS = 300
 _DEFAULT_SCRAPE_TIMEOUT_SECS = 120
-_DEFAULT_TASK_TIMEOUT_SECS = 300
 _DEFAULT_DATASET_ITEMS_LIMIT = 100
 _RUN_STATUS_SUCCEEDED = 'SUCCEEDED'
 
@@ -23,7 +22,7 @@ class ApifyToolsClient:
     """Internal helper that wraps ``ApifyClient`` for the tools layer.
 
     One convenience method per tool operation. All methods are synchronous and
-    block until the Actor run finishes.,
+    block until the Actor run finishes.
 
     Args:
         apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
@@ -118,6 +117,7 @@ def run_actor_and_get_items(
         Raises:
             RuntimeError: If the run does not finish with status ``SUCCEEDED``.
         """
+        # run_actor() raises RuntimeError on Actor failure; the except below only covers the dataset fetch.
         run = self.run_actor(actor_id, run_input, timeout_secs, memory_mbytes)
         dataset_id = run.get('defaultDatasetId', '')
         try:
@@ -168,7 +168,7 @@ def run_task_and_get_items(
         self,
         task_id: str,
         task_input: dict | None = None,
-        timeout_secs: int = _DEFAULT_TASK_TIMEOUT_SECS,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
         memory_mbytes: int | None = None,
         dataset_items_limit: int = _DEFAULT_DATASET_ITEMS_LIMIT,
     ) -> tuple[dict, list[dict]]:
@@ -188,6 +188,7 @@ def run_task_and_get_items(
         Raises:
             RuntimeError: If the run does not finish with status ``SUCCEEDED``.
         """
+        # run_task() raises RuntimeError on task failure; the except below only covers the dataset fetch.
         run = self.run_task(task_id, task_input, timeout_secs, memory_mbytes)
         dataset_id = run.get('defaultDatasetId', '')
         try:
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 8315bdc..b870f0f 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -2,8 +2,8 @@
 
 import json
 import os
-from typing import TYPE_CHECKING, Any
 from datetime import datetime
+from typing import TYPE_CHECKING, Any
 
 from apify_client import ApifyClient
 from langchain_core.tools import BaseTool, ToolException
@@ -238,18 +238,26 @@ class ApifyRunTaskInput(BaseModel):
     """Input schema for :class:`ApifyRunTaskTool`."""
 
     task_id: str = Field(description='Task ID or name (e.g. "user/my-task").')
-    task_input: dict | None = Field(default=None, description='JSON-serialisable input that overrides the task\'s pre-saved input.')
+    task_input: dict | None = Field(
+        default=None, description="JSON-serialisable input that overrides the task's pre-saved input."
+    )
     timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the run to finish.')
-    memory_mbytes: int | None = Field(default=None, description='Memory limit in MB for the run, or null for task default.')
+    memory_mbytes: int | None = Field(
+        default=None, description='Memory limit in MB for the run, or null for task default.'
+    )
 
 
 class ApifyRunTaskAndGetItemsInput(BaseModel):
     """Input schema for :class:`ApifyRunTaskAndGetItemsTool`."""
 
     task_id: str = Field(description='Task ID or name (e.g. "user/my-task").')
-    task_input: dict | None = Field(default=None, description='JSON-serialisable input that overrides the task\'s pre-saved input.')
+    task_input: dict | None = Field(
+        default=None, description="JSON-serialisable input that overrides the task's pre-saved input."
+    )
     timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the run to finish.')
-    memory_mbytes: int | None = Field(default=None, description='Memory limit in MB for the run, or null for task default.')
+    memory_mbytes: int | None = Field(
+        default=None, description='Memory limit in MB for the run, or null for task default.'
+    )
     dataset_items_limit: int = Field(default=100, description='Maximum number of dataset items to return.')
 
 
@@ -258,7 +266,7 @@ class ApifyRunTaskAndGetItemsInput(BaseModel):
 # ---------------------------------------------------------------------------
 
 
-def _iso(value: str | None) -> str | None:
+def _iso(value: str | datetime | None) -> str | None:
     if isinstance(value, datetime):
         return value.isoformat()
     return value
@@ -294,6 +302,7 @@ class _ApifyGenericTool(BaseTool):  # type: ignore[override]
 
     def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
         super().__init__(**kwargs)
+        # Token validation (missing env var, empty string) is handled inside ApifyToolsClient.__init__.
         self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
 

From 429a3ed6027b2e79d5b123d43dbdbf5ec3a621d6 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 15:00:47 +0200
Subject: [PATCH 15/63] fix: enhance error handling for missing dataset id in
 run_actor and run_task methods

---
 langchain_apify/_client.py      | 10 ++++++++--
 tests/unit_tests/test_client.py | 16 ++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index 0409fe0..b131484 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -119,7 +119,10 @@ def run_actor_and_get_items(
         """
         # run_actor() raises RuntimeError on Actor failure; the except below only covers the dataset fetch.
         run = self.run_actor(actor_id, run_input, timeout_secs, memory_mbytes)
-        dataset_id = run.get('defaultDatasetId', '')
+        dataset_id = run.get('defaultDatasetId')
+        if not dataset_id:
+            msg = f'Actor {actor_id} run succeeded but returned no default dataset ID.'
+            raise RuntimeError(msg)
         try:
             items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
         except Exception as exc:
@@ -190,7 +193,10 @@ def run_task_and_get_items(
         """
         # run_task() raises RuntimeError on task failure; the except below only covers the dataset fetch.
         run = self.run_task(task_id, task_input, timeout_secs, memory_mbytes)
-        dataset_id = run.get('defaultDatasetId', '')
+        dataset_id = run.get('defaultDatasetId')
+        if not dataset_id:
+            msg = f'Task {task_id} run succeeded but returned no default dataset ID.'
+            raise RuntimeError(msg)
         try:
             items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
         except Exception as exc:
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index 95193c4..5485d8c 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -132,6 +132,14 @@ def test_run_actor_and_get_items_success(client: ApifyToolsClient, mock_apify_cl
     mock_apify_client.dataset.assert_called_once_with('dataset-xyz')
 
 
+def test_run_actor_and_get_items_missing_dataset_id_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    run_no_dataset = {**_SUCCEEDED_RUN, 'defaultDatasetId': None}
+    mock_apify_client.actor.return_value.call.return_value = run_no_dataset
+
+    with pytest.raises(RuntimeError, match='no default dataset ID'):
+        client.run_actor_and_get_items('apify/test-actor')
+
+
 # ---------------------------------------------------------------------------
 # run_task
 # ---------------------------------------------------------------------------
@@ -169,6 +177,14 @@ def test_run_task_and_get_items_success(client: ApifyToolsClient, mock_apify_cli
     assert items == _SAMPLE_ITEMS
 
 
+def test_run_task_and_get_items_missing_dataset_id_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    run_no_dataset = {**_SUCCEEDED_RUN, 'defaultDatasetId': None}
+    mock_apify_client.task.return_value.call.return_value = run_no_dataset
+
+    with pytest.raises(RuntimeError, match='no default dataset ID'):
+        client.run_task_and_get_items('user/my-task')
+
+
 # ---------------------------------------------------------------------------
 # scrape_url
 # ---------------------------------------------------------------------------

From b914e47dfbefb8f13eeddf1ce6512efaa3d31b64 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 15:15:50 +0200
Subject: [PATCH 16/63] fix: update apifygetdatasetitemstool to return a json
 object with items and message for empty dataset

---
 langchain_apify/tools.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index b870f0f..af0d84b 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -370,16 +370,16 @@ def _run(
 class ApifyGetDatasetItemsTool(_ApifyGenericTool):
     """Fetch items from an existing Apify dataset by ID.
 
-    Returns items as a JSON string.  When the dataset is empty the tool returns
-    an informative JSON message instead of raising an error.
+    Returns a JSON object with an ``"items"`` key containing the list of item
+    dicts.  When the dataset is empty an additional ``"message"`` key is
+    included.
 
     Args:
         apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
             environment variable when *None*.
 
     Returns:
-        JSON array of item dicts, or ``{"items": [], "message": "..."}`` when
-        the dataset is empty.
+        JSON object ``{"items": [...]}``; includes ``"message"`` when empty.
 
     Example:
         .. code-block:: python
@@ -395,10 +395,9 @@ class ApifyGetDatasetItemsTool(_ApifyGenericTool):
 
     name: str = 'apify_get_dataset_items'
     description: str = (
-        'Fetch items from an Apify dataset by ID. Returns a JSON array of item dicts.'
+        'Fetch items from an Apify dataset by ID. Returns a JSON object with an "items" array.'
         ' Required: dataset_id (str) — Apify dataset ID.'
         ' Optional: limit (int, default 100), offset (int, default 0).'
-        ' Returns an empty JSON object with a message when the dataset is empty.'
     )
     args_schema: type[BaseModel] = ApifyGetDatasetItemsInput
 
@@ -415,7 +414,7 @@ def _run(
             raise ToolException(str(exc)) from exc
         if not items:
             return json.dumps({'items': [], 'message': 'Dataset is empty or not found.'})
-        return json.dumps(items)
+        return json.dumps({'items': items})
 
 
 class ApifyRunActorAndGetItemsTool(_ApifyGenericTool):

From 0f7118180ddd8aa583b7ac636d2aa33aefee5e68 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 15:16:28 +0200
Subject: [PATCH 17/63] feat: add integration smoke tests for generic Apify
 tools to validate api interaction

---
 tests/integration_tests/test_generic_tools.py | 68 +++++++++++++++++++
 tests/unit_tests/test_tools.py                |  6 +-
 2 files changed, 71 insertions(+), 3 deletions(-)
 create mode 100644 tests/integration_tests/test_generic_tools.py

diff --git a/tests/integration_tests/test_generic_tools.py b/tests/integration_tests/test_generic_tools.py
new file mode 100644
index 0000000..58c5cbf
--- /dev/null
+++ b/tests/integration_tests/test_generic_tools.py
@@ -0,0 +1,68 @@
+"""Integration smoke tests for the generic Apify tools.
+
+These tests hit the real Apify API and require the ``APIFY_API_TOKEN``
+environment variable to be set.  They use ``apify/python-example`` (a
+trivial Actor that adds two numbers) to keep execution fast and cheap.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+
+import pytest
+
+from langchain_apify import (
+    ApifyGetDatasetItemsTool,
+    ApifyRunActorAndGetItemsTool,
+    ApifyRunActorTool,
+    ApifyScrapeUrlTool,
+)
+
+_ACTOR_ID = 'apify/python-example'
+_RUN_INPUT = {'first_number': 2, 'second_number': 3}
+
+pytestmark = pytest.mark.skipif(
+    not os.getenv('APIFY_API_TOKEN'),
+    reason='APIFY_API_TOKEN not set',
+)
+
+
+def test_run_actor_tool_smoke() -> None:
+    tool = ApifyRunActorTool()
+    result = tool.invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT})
+
+    parsed = json.loads(result)
+    assert parsed['status'] == 'SUCCEEDED'
+    assert parsed['run_id']
+    assert parsed['dataset_id']
+
+
+def test_get_dataset_items_tool_smoke() -> None:
+    run_tool = ApifyRunActorTool()
+    run_result = json.loads(run_tool.invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT}))
+    dataset_id = run_result['dataset_id']
+
+    items_tool = ApifyGetDatasetItemsTool()
+    result = items_tool.invoke({'dataset_id': dataset_id, 'limit': 10})
+
+    parsed = json.loads(result)
+    assert 'items' in parsed
+    assert isinstance(parsed['items'], list)
+
+
+def test_run_actor_and_get_items_tool_smoke() -> None:
+    tool = ApifyRunActorAndGetItemsTool()
+    result = tool.invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT})
+
+    parsed = json.loads(result)
+    assert parsed['run']['status'] == 'SUCCEEDED'
+    assert isinstance(parsed['items'], list)
+
+
+def test_scrape_url_tool_smoke() -> None:
+    tool = ApifyScrapeUrlTool()
+    result = tool.invoke({'url': 'https://crawlee.dev'})
+
+    assert isinstance(result, str)
+    assert len(result) > 0
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 331054d..21e1009 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -243,15 +243,15 @@ def test_run_actor_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_get_dataset_items_tool_returns_json_array(mock_tools_client: MagicMock) -> None:
+def test_get_dataset_items_tool_returns_json_object(mock_tools_client: MagicMock) -> None:
     mock_tools_client.get_dataset_items.return_value = _SAMPLE_ITEMS
     tool = _make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
 
     result = tool._run(dataset_id='dataset-xyz', limit=50, offset=5)
 
     parsed = json.loads(result)
-    assert len(parsed) == 2
-    assert parsed[0]['text'] == 'item-1'
+    assert len(parsed['items']) == 2
+    assert parsed['items'][0]['text'] == 'item-1'
     mock_tools_client.get_dataset_items.assert_called_once_with('dataset-xyz', 50, 5)
 
 

From 50c52f2cda5b3007c63a85dd52c5f7e82b8321e9 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 21 Apr 2026 16:01:30 +0200
Subject: [PATCH 18/63] feat: implement clamping for timeout, memory, and item
 limits in apify tools to enforce safety constraints

---
 langchain_apify/tools.py                      |  46 ++++-
 tests/integration_tests/test_generic_tools.py |  26 +++
 tests/unit_tests/conftest.py                  |  51 ++++++
 tests/unit_tests/test_client.py               |  77 +++-----
 tests/unit_tests/test_tools.py                | 173 ++++++++++++------
 5 files changed, 260 insertions(+), 113 deletions(-)
 create mode 100644 tests/unit_tests/conftest.py

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index af0d84b..31f55d5 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -267,6 +267,7 @@ class ApifyRunTaskAndGetItemsInput(BaseModel):
 
 
 def _iso(value: str | datetime | None) -> str | None:
+    """Coerce a possible ``datetime`` to an ISO-8601 string."""
     if isinstance(value, datetime):
         return value.isoformat()
     return value
@@ -291,20 +292,37 @@ def _run_meta(run: dict) -> dict:
 class _ApifyGenericTool(BaseTool):  # type: ignore[override]
     """Shared base for all generic Apify tools.
 
-    Handles ``ApifyToolsClient`` creation and sets ``handle_tool_error``.
+    Handles ``ApifyToolsClient`` creation, sets ``handle_tool_error``,
+    and defines developer-controlled safety limits that clamp values the
+    LLM may provide at invocation time.
+
     Subclasses only need to declare ``name``, ``description``,
     ``args_schema``, and ``_run()``.
     """
 
     handle_tool_error: bool = True
 
+    max_timeout_secs: int = Field(default=600, description='Upper bound for timeout_secs the LLM may request.')
+    max_memory_mbytes: int = Field(default=32768, description='Upper bound for memory_mbytes the LLM may request.')
+    max_items: int = Field(default=1000, description='Upper bound for limit / dataset_items_limit the LLM may request.')
+
     _client: ApifyToolsClient
 
     def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
         super().__init__(**kwargs)
-        # Token validation (missing env var, empty string) is handled inside ApifyToolsClient.__init__.
         self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
+    def _clamp_timeout(self, value: int) -> int:
+        return min(value, self.max_timeout_secs)
+
+    def _clamp_memory(self, value: int | None) -> int | None:
+        if value is None:
+            return None
+        return min(value, self.max_memory_mbytes)
+
+    def _clamp_items(self, value: int) -> int:
+        return min(value, self.max_items)
+
 
 # ---------------------------------------------------------------------------
 # Generic tools
@@ -361,7 +379,9 @@ def _run(
         _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
         try:
-            run = self._client.run_actor(actor_id, run_input, timeout_secs, memory_mbytes)
+            run = self._client.run_actor(
+                actor_id, run_input, self._clamp_timeout(timeout_secs), self._clamp_memory(memory_mbytes)
+            )
         except RuntimeError as exc:
             raise ToolException(str(exc)) from exc
         return json.dumps(_run_meta(run))
@@ -409,7 +429,7 @@ def _run(
         _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
         try:
-            items = self._client.get_dataset_items(dataset_id, limit, offset)
+            items = self._client.get_dataset_items(dataset_id, self._clamp_items(limit), offset)
         except RuntimeError as exc:
             raise ToolException(str(exc)) from exc
         if not items:
@@ -470,7 +490,11 @@ def _run(
     ) -> str:
         try:
             run, items = self._client.run_actor_and_get_items(
-                actor_id, run_input, timeout_secs, memory_mbytes, dataset_items_limit
+                actor_id,
+                run_input,
+                self._clamp_timeout(timeout_secs),
+                self._clamp_memory(memory_mbytes),
+                self._clamp_items(dataset_items_limit),
             )
         except RuntimeError as exc:
             raise ToolException(str(exc)) from exc
@@ -520,7 +544,7 @@ def _run(
         _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
         try:
-            return self._client.scrape_url(url, timeout_secs)
+            return self._client.scrape_url(url, self._clamp_timeout(timeout_secs))
         except RuntimeError as exc:
             raise ToolException(str(exc)) from exc
 
@@ -576,7 +600,9 @@ def _run(
         _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
         try:
-            run = self._client.run_task(task_id, task_input, timeout_secs, memory_mbytes)
+            run = self._client.run_task(
+                task_id, task_input, self._clamp_timeout(timeout_secs), self._clamp_memory(memory_mbytes)
+            )
         except RuntimeError as exc:
             raise ToolException(str(exc)) from exc
         return json.dumps(_run_meta(run))
@@ -635,7 +661,11 @@ def _run(
     ) -> str:
         try:
             run, items = self._client.run_task_and_get_items(
-                task_id, task_input, timeout_secs, memory_mbytes, dataset_items_limit
+                task_id,
+                task_input,
+                self._clamp_timeout(timeout_secs),
+                self._clamp_memory(memory_mbytes),
+                self._clamp_items(dataset_items_limit),
             )
         except RuntimeError as exc:
             raise ToolException(str(exc)) from exc
diff --git a/tests/integration_tests/test_generic_tools.py b/tests/integration_tests/test_generic_tools.py
index 58c5cbf..863efb1 100644
--- a/tests/integration_tests/test_generic_tools.py
+++ b/tests/integration_tests/test_generic_tools.py
@@ -16,6 +16,8 @@
     ApifyGetDatasetItemsTool,
     ApifyRunActorAndGetItemsTool,
     ApifyRunActorTool,
+    ApifyRunTaskAndGetItemsTool,
+    ApifyRunTaskTool,
     ApifyScrapeUrlTool,
 )
 
@@ -66,3 +68,27 @@ def test_scrape_url_tool_smoke() -> None:
 
     assert isinstance(result, str)
     assert len(result) > 0
+
+
+_TASK_ID = os.getenv('APIFY_TASK_ID', '')
+
+
+@pytest.mark.skipif(not _TASK_ID, reason='APIFY_TASK_ID not set')
+def test_run_task_tool_smoke() -> None:
+    tool = ApifyRunTaskTool()
+    result = tool.invoke({'task_id': _TASK_ID})
+
+    parsed = json.loads(result)
+    assert parsed['status'] == 'SUCCEEDED'
+    assert parsed['run_id']
+    assert parsed['dataset_id']
+
+
+@pytest.mark.skipif(not _TASK_ID, reason='APIFY_TASK_ID not set')
+def test_run_task_and_get_items_tool_smoke() -> None:
+    tool = ApifyRunTaskAndGetItemsTool()
+    result = tool.invoke({'task_id': _TASK_ID})
+
+    parsed = json.loads(result)
+    assert parsed['run']['status'] == 'SUCCEEDED'
+    assert isinstance(parsed['items'], list)
diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
new file mode 100644
index 0000000..eedadb9
--- /dev/null
+++ b/tests/unit_tests/conftest.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from langchain_apify._client import ApifyToolsClient
+
+SUCCEEDED_RUN: dict = {
+    'id': 'run-abc',
+    'status': 'SUCCEEDED',
+    'defaultDatasetId': 'dataset-xyz',
+    'startedAt': '2025-01-01T00:00:00.000Z',
+    'finishedAt': '2025-01-01T00:01:00.000Z',
+}
+
+FAILED_RUN: dict = {
+    'id': 'run-fail',
+    'status': 'FAILED',
+    'defaultDatasetId': 'dataset-xyz',
+}
+
+SAMPLE_ITEMS: list[dict] = [
+    {'text': 'item-1', 'url': 'https://example.com/1'},
+    {'text': 'item-2', 'url': 'https://example.com/2'},
+]
+
+
+@pytest.fixture
+def mock_tools_client() -> MagicMock:
+    return MagicMock(spec=ApifyToolsClient)
+
+
+@pytest.fixture
+def mock_apify_client() -> MagicMock:
+    return MagicMock()
+
+
+@pytest.fixture
+def client(mock_apify_client: MagicMock) -> ApifyToolsClient:
+    with patch('langchain_apify._client.create_apify_client', return_value=mock_apify_client):
+        return ApifyToolsClient(apify_api_token='dummy-token')
+
+
+def make_tool(tool_cls: type, mock_client: MagicMock, **kwargs: Any) -> Any:  # noqa: ANN401
+    """Instantiate a generic tool with a mocked ApifyToolsClient."""
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        tool = tool_cls(apify_api_token='dummy-token', **kwargs)
+    tool._client = mock_client
+    return tool
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index 5485d8c..c35f495 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -5,36 +5,7 @@
 import pytest
 
 from langchain_apify._client import ApifyToolsClient
-
-_SUCCEEDED_RUN: dict = {
-    'id': 'run-abc',
-    'status': 'SUCCEEDED',
-    'defaultDatasetId': 'dataset-xyz',
-    'startedAt': '2025-01-01T00:00:00.000Z',
-    'finishedAt': '2025-01-01T00:01:00.000Z',
-}
-
-_FAILED_RUN: dict = {
-    'id': 'run-fail',
-    'status': 'FAILED',
-    'defaultDatasetId': 'dataset-xyz',
-}
-
-_SAMPLE_ITEMS: list[dict] = [
-    {'text': 'item-1', 'url': 'https://example.com/1'},
-    {'text': 'item-2', 'url': 'https://example.com/2'},
-]
-
-
-@pytest.fixture
-def mock_apify_client() -> MagicMock:
-    return MagicMock()
-
-
-@pytest.fixture
-def client(mock_apify_client: MagicMock) -> ApifyToolsClient:
-    with patch('langchain_apify._client.create_apify_client', return_value=mock_apify_client):
-        return ApifyToolsClient(apify_api_token='dummy-token')
+from tests.unit_tests.conftest import FAILED_RUN, SAMPLE_ITEMS, SUCCEEDED_RUN
 
 
 # ---------------------------------------------------------------------------
@@ -68,17 +39,17 @@ def test_init_missing_token_raises(monkeypatch: pytest.MonkeyPatch) -> None:
 
 
 def test_run_actor_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
 
     result = client.run_actor('apify/test-actor', run_input={'key': 'val'})
 
     mock_apify_client.actor.assert_called_once_with('apify/test-actor')
     mock_apify_client.actor.return_value.call.assert_called_once_with(run_input={'key': 'val'}, timeout_secs=300, logger=None)
-    assert result == _SUCCEEDED_RUN
+    assert result == SUCCEEDED_RUN
 
 
 def test_run_actor_with_memory(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
 
     client.run_actor('apify/test-actor', memory_mbytes=512)
 
@@ -88,7 +59,7 @@ def test_run_actor_with_memory(client: ApifyToolsClient, mock_apify_client: Magi
 
 
 def test_run_actor_failed_status_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.actor.return_value.call.return_value = _FAILED_RUN
+    mock_apify_client.actor.return_value.call.return_value = FAILED_RUN
 
     with pytest.raises(RuntimeError, match='run-fail'):
         client.run_actor('apify/test-actor')
@@ -100,13 +71,13 @@ def test_run_actor_failed_status_raises(client: ApifyToolsClient, mock_apify_cli
 
 
 def test_get_dataset_items_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.dataset.return_value.list_items.return_value.items = _SAMPLE_ITEMS
+    mock_apify_client.dataset.return_value.list_items.return_value.items = SAMPLE_ITEMS
 
     items = client.get_dataset_items('dataset-xyz', limit=50, offset=10)
 
     mock_apify_client.dataset.assert_called_once_with('dataset-xyz')
     mock_apify_client.dataset.return_value.list_items.assert_called_once_with(limit=50, offset=10, clean=True)
-    assert items == _SAMPLE_ITEMS
+    assert items == SAMPLE_ITEMS
 
 
 def test_get_dataset_items_empty(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
@@ -122,18 +93,18 @@ def test_get_dataset_items_empty(client: ApifyToolsClient, mock_apify_client: Ma
 
 
 def test_run_actor_and_get_items_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
-    mock_apify_client.dataset.return_value.list_items.return_value.items = _SAMPLE_ITEMS
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = SAMPLE_ITEMS
 
     run, items = client.run_actor_and_get_items('apify/test-actor', run_input={'q': '1'})
 
-    assert run == _SUCCEEDED_RUN
-    assert items == _SAMPLE_ITEMS
+    assert run == SUCCEEDED_RUN
+    assert items == SAMPLE_ITEMS
     mock_apify_client.dataset.assert_called_once_with('dataset-xyz')
 
 
 def test_run_actor_and_get_items_missing_dataset_id_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    run_no_dataset = {**_SUCCEEDED_RUN, 'defaultDatasetId': None}
+    run_no_dataset = {**SUCCEEDED_RUN, 'defaultDatasetId': None}
     mock_apify_client.actor.return_value.call.return_value = run_no_dataset
 
     with pytest.raises(RuntimeError, match='no default dataset ID'):
@@ -146,17 +117,17 @@ def test_run_actor_and_get_items_missing_dataset_id_raises(client: ApifyToolsCli
 
 
 def test_run_task_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.task.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.task.return_value.call.return_value = SUCCEEDED_RUN
 
     result = client.run_task('user/my-task', task_input={'key': 'val'})
 
     mock_apify_client.task.assert_called_once_with('user/my-task')
     mock_apify_client.task.return_value.call.assert_called_once_with(task_input={'key': 'val'}, timeout_secs=300)
-    assert result == _SUCCEEDED_RUN
+    assert result == SUCCEEDED_RUN
 
 
 def test_run_task_failed_status_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.task.return_value.call.return_value = _FAILED_RUN
+    mock_apify_client.task.return_value.call.return_value = FAILED_RUN
 
     with pytest.raises(RuntimeError, match='run-fail'):
         client.run_task('user/my-task')
@@ -168,17 +139,17 @@ def test_run_task_failed_status_raises(client: ApifyToolsClient, mock_apify_clie
 
 
 def test_run_task_and_get_items_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.task.return_value.call.return_value = _SUCCEEDED_RUN
-    mock_apify_client.dataset.return_value.list_items.return_value.items = _SAMPLE_ITEMS
+    mock_apify_client.task.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = SAMPLE_ITEMS
 
     run, items = client.run_task_and_get_items('user/my-task')
 
-    assert run == _SUCCEEDED_RUN
-    assert items == _SAMPLE_ITEMS
+    assert run == SUCCEEDED_RUN
+    assert items == SAMPLE_ITEMS
 
 
 def test_run_task_and_get_items_missing_dataset_id_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    run_no_dataset = {**_SUCCEEDED_RUN, 'defaultDatasetId': None}
+    run_no_dataset = {**SUCCEEDED_RUN, 'defaultDatasetId': None}
     mock_apify_client.task.return_value.call.return_value = run_no_dataset
 
     with pytest.raises(RuntimeError, match='no default dataset ID'):
@@ -191,7 +162,7 @@ def test_run_task_and_get_items_missing_dataset_id_raises(client: ApifyToolsClie
 
 
 def test_scrape_url_returns_markdown(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
     mock_apify_client.dataset.return_value.list_items.return_value.items = [
         {'markdown': '# Hello', 'text': 'Hello', 'url': 'https://example.com'},
     ]
@@ -201,7 +172,7 @@ def test_scrape_url_returns_markdown(client: ApifyToolsClient, mock_apify_client
 
 
 def test_scrape_url_falls_back_to_text(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
     mock_apify_client.dataset.return_value.list_items.return_value.items = [
         {'text': 'Plain text content', 'url': 'https://example.com'},
     ]
@@ -211,7 +182,7 @@ def test_scrape_url_falls_back_to_text(client: ApifyToolsClient, mock_apify_clie
 
 
 def test_scrape_url_empty_items_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
     mock_apify_client.dataset.return_value.list_items.return_value.items = []
 
     with pytest.raises(RuntimeError, match='No content extracted'):
@@ -219,7 +190,7 @@ def test_scrape_url_empty_items_raises(client: ApifyToolsClient, mock_apify_clie
 
 
 def test_scrape_url_empty_content_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.actor.return_value.call.return_value = _SUCCEEDED_RUN
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
     mock_apify_client.dataset.return_value.list_items.return_value.items = [
         {'markdown': '', 'text': '', 'url': 'https://example.com'},
     ]
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 21e1009..61e4c8b 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -2,7 +2,7 @@
 
 import json
 from datetime import datetime, timezone
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -24,6 +24,7 @@
     _run_meta,
 )
 from langchain_apify.utils import actor_id_to_tool_name
+from tests.unit_tests.conftest import SAMPLE_ITEMS, SUCCEEDED_RUN, make_tool
 
 if TYPE_CHECKING:
     from collections.abc import Generator
@@ -103,37 +104,6 @@ class DummyModel(BaseModel):
         yield tool
 
 
-# ---------------------------------------------------------------------------
-# Shared test data for generic tools
-# ---------------------------------------------------------------------------
-
-_SUCCEEDED_RUN: dict = {
-    'id': 'run-abc',
-    'status': 'SUCCEEDED',
-    'defaultDatasetId': 'dataset-xyz',
-    'startedAt': '2025-01-01T00:00:00.000Z',
-    'finishedAt': '2025-01-01T00:01:00.000Z',
-}
-
-_SAMPLE_ITEMS: list[dict] = [
-    {'text': 'item-1', 'url': 'https://example.com/1'},
-    {'text': 'item-2', 'url': 'https://example.com/2'},
-]
-
-
-@pytest.fixture
-def mock_tools_client() -> MagicMock:
-    return MagicMock(spec=ApifyToolsClient)
-
-
-def _make_tool(tool_cls: type, mock_client: MagicMock) -> Any:  # noqa: ANN401
-    """Instantiate a generic tool with a mocked ApifyToolsClient."""
-    with patch.object(ApifyToolsClient, '__init__', return_value=None):
-        tool = tool_cls(apify_api_token='dummy-token')
-    tool._client = mock_client
-    return tool
-
-
 # ---------------------------------------------------------------------------
 # _iso / _run_meta helpers
 # ---------------------------------------------------------------------------
@@ -169,7 +139,7 @@ def test_run_meta_with_datetime_values_is_json_serializable() -> None:
 
 
 def test_run_meta_with_string_values_is_json_serializable() -> None:
-    meta = _run_meta(_SUCCEEDED_RUN)
+    meta = _run_meta(SUCCEEDED_RUN)
     serialized = json.dumps(meta)
     parsed = json.loads(serialized)
     assert parsed['started_at'] == '2025-01-01T00:00:00.000Z'
@@ -194,7 +164,7 @@ def test_run_actor_tool_with_datetime_run(mock_tools_client: MagicMock) -> None:
         'startedAt': datetime(2025, 6, 1, 8, 0, 0, tzinfo=timezone.utc),
         'finishedAt': datetime(2025, 6, 1, 8, 5, 0, tzinfo=timezone.utc),
     }
-    tool = _make_tool(ApifyRunActorTool, mock_tools_client)
+    tool = make_tool(ApifyRunActorTool, mock_tools_client)
 
     result = tool._run(actor_id='apify/test')
 
@@ -210,8 +180,8 @@ def test_run_actor_tool_with_datetime_run(mock_tools_client: MagicMock) -> None:
 
 
 def test_run_actor_tool_returns_json(mock_tools_client: MagicMock) -> None:
-    mock_tools_client.run_actor.return_value = _SUCCEEDED_RUN
-    tool = _make_tool(ApifyRunActorTool, mock_tools_client)
+    mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunActorTool, mock_tools_client)
 
     result = tool._run(actor_id='apify/test', run_input={'key': 'val'})
 
@@ -226,7 +196,7 @@ def test_run_actor_tool_returns_json(mock_tools_client: MagicMock) -> None:
 
 def test_run_actor_tool_failure_raises_tool_exception(mock_tools_client: MagicMock) -> None:
     mock_tools_client.run_actor.side_effect = RuntimeError('Actor run run-bad ended with status FAILED.')
-    tool = _make_tool(ApifyRunActorTool, mock_tools_client)
+    tool = make_tool(ApifyRunActorTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='FAILED'):
         tool._run(actor_id='apify/test')
@@ -244,8 +214,8 @@ def test_run_actor_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
 
 
 def test_get_dataset_items_tool_returns_json_object(mock_tools_client: MagicMock) -> None:
-    mock_tools_client.get_dataset_items.return_value = _SAMPLE_ITEMS
-    tool = _make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
+    mock_tools_client.get_dataset_items.return_value = SAMPLE_ITEMS
+    tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
 
     result = tool._run(dataset_id='dataset-xyz', limit=50, offset=5)
 
@@ -257,7 +227,7 @@ def test_get_dataset_items_tool_returns_json_object(mock_tools_client: MagicMock
 
 def test_get_dataset_items_tool_empty_returns_message(mock_tools_client: MagicMock) -> None:
     mock_tools_client.get_dataset_items.return_value = []
-    tool = _make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
+    tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
 
     result = tool._run(dataset_id='dataset-empty')
 
@@ -268,7 +238,7 @@ def test_get_dataset_items_tool_empty_returns_message(mock_tools_client: MagicMo
 
 def test_get_dataset_items_tool_network_error_raises_tool_exception(mock_tools_client: MagicMock) -> None:
     mock_tools_client.get_dataset_items.side_effect = RuntimeError('Network error fetching dataset ds-bad: connection reset')
-    tool = _make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
+    tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='Network error fetching dataset'):
         tool._run(dataset_id='ds-bad')
@@ -286,8 +256,8 @@ def test_get_dataset_items_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -
 
 
 def test_run_actor_and_get_items_tool_returns_json(mock_tools_client: MagicMock) -> None:
-    mock_tools_client.run_actor_and_get_items.return_value = (_SUCCEEDED_RUN, _SAMPLE_ITEMS)
-    tool = _make_tool(ApifyRunActorAndGetItemsTool, mock_tools_client)
+    mock_tools_client.run_actor_and_get_items.return_value = (SUCCEEDED_RUN, SAMPLE_ITEMS)
+    tool = make_tool(ApifyRunActorAndGetItemsTool, mock_tools_client)
 
     result = tool._run(actor_id='apify/test', run_input={'q': '1'}, dataset_items_limit=50)
 
@@ -302,7 +272,7 @@ def test_run_actor_and_get_items_tool_failure_raises_tool_exception(mock_tools_c
     mock_tools_client.run_actor_and_get_items.side_effect = RuntimeError(
         'Actor run run-bad ended with status TIMED-OUT.'
     )
-    tool = _make_tool(ApifyRunActorAndGetItemsTool, mock_tools_client)
+    tool = make_tool(ApifyRunActorAndGetItemsTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='TIMED-OUT'):
         tool._run(actor_id='apify/test')
@@ -321,7 +291,7 @@ def test_run_actor_and_get_items_tool_missing_token(monkeypatch: pytest.MonkeyPa
 
 def test_scrape_url_tool_returns_markdown(mock_tools_client: MagicMock) -> None:
     mock_tools_client.scrape_url.return_value = '# Hello World'
-    tool = _make_tool(ApifyScrapeUrlTool, mock_tools_client)
+    tool = make_tool(ApifyScrapeUrlTool, mock_tools_client)
 
     result = tool._run(url='https://example.com')
 
@@ -331,7 +301,7 @@ def test_scrape_url_tool_returns_markdown(mock_tools_client: MagicMock) -> None:
 
 def test_scrape_url_tool_empty_raises_tool_exception(mock_tools_client: MagicMock) -> None:
     mock_tools_client.scrape_url.side_effect = RuntimeError('No content extracted from https://example.com.')
-    tool = _make_tool(ApifyScrapeUrlTool, mock_tools_client)
+    tool = make_tool(ApifyScrapeUrlTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='No content extracted'):
         tool._run(url='https://example.com')
@@ -349,8 +319,8 @@ def test_scrape_url_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
 
 
 def test_run_task_tool_returns_json(mock_tools_client: MagicMock) -> None:
-    mock_tools_client.run_task.return_value = _SUCCEEDED_RUN
-    tool = _make_tool(ApifyRunTaskTool, mock_tools_client)
+    mock_tools_client.run_task.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunTaskTool, mock_tools_client)
 
     result = tool._run(task_id='user/my-task', task_input={'key': 'val'})
 
@@ -365,7 +335,7 @@ def test_run_task_tool_returns_json(mock_tools_client: MagicMock) -> None:
 
 def test_run_task_tool_failure_raises_tool_exception(mock_tools_client: MagicMock) -> None:
     mock_tools_client.run_task.side_effect = RuntimeError('Actor run run-bad ended with status FAILED.')
-    tool = _make_tool(ApifyRunTaskTool, mock_tools_client)
+    tool = make_tool(ApifyRunTaskTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='FAILED'):
         tool._run(task_id='user/my-task')
@@ -383,8 +353,8 @@ def test_run_task_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
 
 
 def test_run_task_and_get_items_tool_returns_json(mock_tools_client: MagicMock) -> None:
-    mock_tools_client.run_task_and_get_items.return_value = (_SUCCEEDED_RUN, _SAMPLE_ITEMS)
-    tool = _make_tool(ApifyRunTaskAndGetItemsTool, mock_tools_client)
+    mock_tools_client.run_task_and_get_items.return_value = (SUCCEEDED_RUN, SAMPLE_ITEMS)
+    tool = make_tool(ApifyRunTaskAndGetItemsTool, mock_tools_client)
 
     result = tool._run(task_id='user/my-task', task_input={'q': '1'}, dataset_items_limit=50)
 
@@ -399,7 +369,7 @@ def test_run_task_and_get_items_tool_failure_raises_tool_exception(mock_tools_cl
     mock_tools_client.run_task_and_get_items.side_effect = RuntimeError(
         'Actor run run-bad ended with status TIMED-OUT.'
     )
-    tool = _make_tool(ApifyRunTaskAndGetItemsTool, mock_tools_client)
+    tool = make_tool(ApifyRunTaskAndGetItemsTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='TIMED-OUT'):
         tool._run(task_id='user/my-task')
@@ -411,6 +381,105 @@ def test_run_task_and_get_items_tool_missing_token(monkeypatch: pytest.MonkeyPat
         ApifyRunTaskAndGetItemsTool()
 
 
+# ---------------------------------------------------------------------------
+# Value clamping (developer safety limits)
+# ---------------------------------------------------------------------------
+
+
+def test_run_actor_tool_clamps_timeout(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunActorTool, mock_tools_client, max_timeout_secs=60)
+
+    tool._run(actor_id='apify/test', timeout_secs=9999)
+
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 60, None)
+
+
+def test_run_actor_tool_clamps_memory(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunActorTool, mock_tools_client, max_memory_mbytes=512)
+
+    tool._run(actor_id='apify/test', memory_mbytes=8192)
+
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, 512)
+
+
+def test_run_actor_tool_passes_none_memory_through(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunActorTool, mock_tools_client, max_memory_mbytes=512)
+
+    tool._run(actor_id='apify/test', memory_mbytes=None)
+
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, None)
+
+
+def test_get_dataset_items_tool_clamps_limit(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.get_dataset_items.return_value = SAMPLE_ITEMS
+    tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client, max_items=10)
+
+    tool._run(dataset_id='ds-1', limit=50000)
+
+    mock_tools_client.get_dataset_items.assert_called_once_with('ds-1', 10, 0)
+
+
+def test_run_actor_and_get_items_tool_clamps_all(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor_and_get_items.return_value = (SUCCEEDED_RUN, SAMPLE_ITEMS)
+    tool = make_tool(
+        ApifyRunActorAndGetItemsTool,
+        mock_tools_client,
+        max_timeout_secs=30,
+        max_memory_mbytes=256,
+        max_items=5,
+    )
+
+    tool._run(actor_id='a', timeout_secs=9999, memory_mbytes=9999, dataset_items_limit=9999)
+
+    mock_tools_client.run_actor_and_get_items.assert_called_once_with('a', None, 30, 256, 5)
+
+
+def test_scrape_url_tool_clamps_timeout(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.scrape_url.return_value = '# content'
+    tool = make_tool(ApifyScrapeUrlTool, mock_tools_client, max_timeout_secs=30)
+
+    tool._run(url='https://example.com', timeout_secs=9999)
+
+    mock_tools_client.scrape_url.assert_called_once_with('https://example.com', 30)
+
+
+def test_run_task_tool_clamps_timeout_and_memory(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_task.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunTaskTool, mock_tools_client, max_timeout_secs=60, max_memory_mbytes=512)
+
+    tool._run(task_id='t/1', timeout_secs=9999, memory_mbytes=9999)
+
+    mock_tools_client.run_task.assert_called_once_with('t/1', None, 60, 512)
+
+
+def test_run_task_and_get_items_tool_clamps_all(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_task_and_get_items.return_value = (SUCCEEDED_RUN, SAMPLE_ITEMS)
+    tool = make_tool(
+        ApifyRunTaskAndGetItemsTool,
+        mock_tools_client,
+        max_timeout_secs=30,
+        max_memory_mbytes=256,
+        max_items=5,
+    )
+
+    tool._run(task_id='t/1', timeout_secs=9999, memory_mbytes=9999, dataset_items_limit=9999)
+
+    mock_tools_client.run_task_and_get_items.assert_called_once_with('t/1', None, 30, 256, 5)
+
+
+def test_values_below_max_pass_through(mock_tools_client: MagicMock) -> None:
+    """When LLM values are within limits they should pass through unchanged."""
+    mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunActorTool, mock_tools_client, max_timeout_secs=600, max_memory_mbytes=4096)
+
+    tool._run(actor_id='apify/test', timeout_secs=120, memory_mbytes=1024)
+
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 120, 1024)
+
+
 # ---------------------------------------------------------------------------
 # Tool metadata assertions
 # ---------------------------------------------------------------------------

From ba179a6c043ee12cd4e387d48ea80f9112e0999b Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 07:37:35 +0200
Subject: [PATCH 19/63] feat: clean up _actor_tools.py and tools.py for
 improved readibility and maintability; update test cases for better
 formatting and error handling

---
 langchain_apify/_actor_tools.py | 20 +++------
 langchain_apify/tools.py        | 16 +++----
 tests/unit_tests/test_client.py | 78 +++++++++++++++++++++++++++++++--
 tests/unit_tests/test_tools.py  | 16 ++++---
 4 files changed, 98 insertions(+), 32 deletions(-)

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
index d7bd850..a989b11 100644
--- a/langchain_apify/_actor_tools.py
+++ b/langchain_apify/_actor_tools.py
@@ -1,17 +1,11 @@
-from __future__ import annotations
-
-import json
-from typing import TYPE_CHECKING, Any
-
-from langchain_core.tools import ToolException
-from pydantic import BaseModel, Field
+"""Actor-specific tool subclasses (search, social-media, etc.).
 
-from langchain_apify._client import ApifyToolsClient
-from langchain_apify.tools import _ApifyGenericTool, _run_meta
-
-if TYPE_CHECKING:
-    from langchain_core.callbacks import CallbackManagerForToolRun
+Downstream feature branches add concrete tools here.  They inherit from
+:class:`~langchain_apify.tools._ApifyGenericTool` and use
+:func:`~langchain_apify.tools._run_meta` to format run metadata.
+"""
 
+from __future__ import annotations
 
 # ---------------------------------------------------------------------------
 # Search & Crawling tools
@@ -20,4 +14,4 @@
 
 # ---------------------------------------------------------------------------
 # Social-media tools
-# ---------------------------------------------------------------------------
\ No newline at end of file
+# ---------------------------------------------------------------------------
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 31f55d5..f771d35 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -7,7 +7,7 @@
 
 from apify_client import ApifyClient
 from langchain_core.tools import BaseTool, ToolException
-from pydantic import BaseModel, Field, create_model
+from pydantic import BaseModel, Field, PrivateAttr, create_model
 
 from langchain_apify._client import ApifyToolsClient
 from langchain_apify._error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
@@ -306,7 +306,7 @@ class _ApifyGenericTool(BaseTool):  # type: ignore[override]
     max_memory_mbytes: int = Field(default=32768, description='Upper bound for memory_mbytes the LLM may request.')
     max_items: int = Field(default=1000, description='Upper bound for limit / dataset_items_limit the LLM may request.')
 
-    _client: ApifyToolsClient
+    _client: ApifyToolsClient = PrivateAttr()
 
     def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
         super().__init__(**kwargs)
@@ -329,7 +329,7 @@ def _clamp_items(self, value: int) -> int:
 # ---------------------------------------------------------------------------
 
 
-class ApifyRunActorTool(_ApifyGenericTool):
+class ApifyRunActorTool(_ApifyGenericTool):  # type: ignore[override]
     """Run any Apify Actor by ID with an arbitrary JSON input.
 
     Returns run metadata (run ID, status, dataset ID, timestamps) as a JSON
@@ -387,7 +387,7 @@ def _run(
         return json.dumps(_run_meta(run))
 
 
-class ApifyGetDatasetItemsTool(_ApifyGenericTool):
+class ApifyGetDatasetItemsTool(_ApifyGenericTool):  # type: ignore[override]
     """Fetch items from an existing Apify dataset by ID.
 
     Returns a JSON object with an ``"items"`` key containing the list of item
@@ -437,7 +437,7 @@ def _run(
         return json.dumps({'items': items})
 
 
-class ApifyRunActorAndGetItemsTool(_ApifyGenericTool):
+class ApifyRunActorAndGetItemsTool(_ApifyGenericTool):  # type: ignore[override]
     """Run any Apify Actor and return both run metadata and dataset items.
 
     Combines :class:`ApifyRunActorTool` and :class:`ApifyGetDatasetItemsTool`
@@ -501,7 +501,7 @@ def _run(
         return json.dumps({'run': _run_meta(run), 'items': items})
 
 
-class ApifyScrapeUrlTool(_ApifyGenericTool):
+class ApifyScrapeUrlTool(_ApifyGenericTool):  # type: ignore[override]
     """Scrape a single URL and return its content as markdown.
 
     Uses the ``apify/website-content-crawler`` Actor under the hood with
@@ -549,7 +549,7 @@ def _run(
             raise ToolException(str(exc)) from exc
 
 
-class ApifyRunTaskTool(_ApifyGenericTool):
+class ApifyRunTaskTool(_ApifyGenericTool):  # type: ignore[override]
     """Run a saved Apify Actor task by ID and return run metadata.
 
     Actor tasks are pre-configured Actor runs saved in the Apify Console.
@@ -608,7 +608,7 @@ def _run(
         return json.dumps(_run_meta(run))
 
 
-class ApifyRunTaskAndGetItemsTool(_ApifyGenericTool):
+class ApifyRunTaskAndGetItemsTool(_ApifyGenericTool):  # type: ignore[override]
     """Run a saved Apify Actor task and return both run metadata and dataset items.
 
     Combines :class:`ApifyRunTaskTool` and :class:`ApifyGetDatasetItemsTool`
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index c35f495..1c93f84 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -7,7 +7,6 @@
 from langchain_apify._client import ApifyToolsClient
 from tests.unit_tests.conftest import FAILED_RUN, SAMPLE_ITEMS, SUCCEEDED_RUN
 
-
 # ---------------------------------------------------------------------------
 # __init__
 # ---------------------------------------------------------------------------
@@ -44,7 +43,9 @@ def test_run_actor_success(client: ApifyToolsClient, mock_apify_client: MagicMoc
     result = client.run_actor('apify/test-actor', run_input={'key': 'val'})
 
     mock_apify_client.actor.assert_called_once_with('apify/test-actor')
-    mock_apify_client.actor.return_value.call.assert_called_once_with(run_input={'key': 'val'}, timeout_secs=300, logger=None)
+    mock_apify_client.actor.return_value.call.assert_called_once_with(
+        run_input={'key': 'val'}, timeout_secs=300, logger=None
+    )
     assert result == SUCCEEDED_RUN
 
 
@@ -103,7 +104,9 @@ def test_run_actor_and_get_items_success(client: ApifyToolsClient, mock_apify_cl
     mock_apify_client.dataset.assert_called_once_with('dataset-xyz')
 
 
-def test_run_actor_and_get_items_missing_dataset_id_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+def test_run_actor_and_get_items_missing_dataset_id_raises(
+    client: ApifyToolsClient, mock_apify_client: MagicMock
+) -> None:
     run_no_dataset = {**SUCCEEDED_RUN, 'defaultDatasetId': None}
     mock_apify_client.actor.return_value.call.return_value = run_no_dataset
 
@@ -148,7 +151,9 @@ def test_run_task_and_get_items_success(client: ApifyToolsClient, mock_apify_cli
     assert items == SAMPLE_ITEMS
 
 
-def test_run_task_and_get_items_missing_dataset_id_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+def test_run_task_and_get_items_missing_dataset_id_raises(
+    client: ApifyToolsClient, mock_apify_client: MagicMock
+) -> None:
     run_no_dataset = {**SUCCEEDED_RUN, 'defaultDatasetId': None}
     mock_apify_client.task.return_value.call.return_value = run_no_dataset
 
@@ -211,3 +216,68 @@ def test_check_run_status_succeeded() -> None:
 def test_check_run_status_failed() -> None:
     with pytest.raises(RuntimeError, match='run-bad'):
         ApifyToolsClient._check_run_status({'id': 'run-bad', 'status': 'FAILED'})
+
+
+# ---------------------------------------------------------------------------
+# None returns from actor/task .call()
+# ---------------------------------------------------------------------------
+
+
+def test_run_actor_none_return_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = None
+
+    with pytest.raises(RuntimeError, match='returned no run details'):
+        client.run_actor('apify/broken-actor')
+
+
+def test_run_task_none_return_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.task.return_value.call.return_value = None
+
+    with pytest.raises(RuntimeError, match='returned no run details'):
+        client.run_task('user/broken-task')
+
+
+# ---------------------------------------------------------------------------
+# Network error wrapping (transport exception -> RuntimeError)
+# ---------------------------------------------------------------------------
+
+
+def test_run_actor_network_error_wraps(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.side_effect = ConnectionError('conn refused')
+
+    with pytest.raises(RuntimeError, match='Network error calling Actor'):
+        client.run_actor('apify/test-actor')
+
+
+def test_get_dataset_items_network_error_wraps(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.dataset.return_value.list_items.side_effect = ConnectionError('timeout')
+
+    with pytest.raises(RuntimeError, match='Network error fetching dataset'):
+        client.get_dataset_items('dataset-xyz')
+
+
+def test_run_actor_and_get_items_dataset_fetch_network_error(
+    client: ApifyToolsClient, mock_apify_client: MagicMock
+) -> None:
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.side_effect = ConnectionError('reset')
+
+    with pytest.raises(RuntimeError, match='Network error fetching dataset'):
+        client.run_actor_and_get_items('apify/test-actor')
+
+
+def test_run_task_network_error_wraps(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.task.return_value.call.side_effect = ConnectionError('conn refused')
+
+    with pytest.raises(RuntimeError, match='Network error calling task'):
+        client.run_task('user/my-task')
+
+
+def test_run_task_and_get_items_dataset_fetch_network_error(
+    client: ApifyToolsClient, mock_apify_client: MagicMock
+) -> None:
+    mock_apify_client.task.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.side_effect = ConnectionError('reset')
+
+    with pytest.raises(RuntimeError, match='Network error fetching dataset'):
+        client.run_task_and_get_items('user/my-task')
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 61e4c8b..6698589 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -237,7 +237,9 @@ def test_get_dataset_items_tool_empty_returns_message(mock_tools_client: MagicMo
 
 
 def test_get_dataset_items_tool_network_error_raises_tool_exception(mock_tools_client: MagicMock) -> None:
-    mock_tools_client.get_dataset_items.side_effect = RuntimeError('Network error fetching dataset ds-bad: connection reset')
+    mock_tools_client.get_dataset_items.side_effect = RuntimeError(
+        'Network error fetching dataset ds-bad: connection reset'
+    )
     tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='Network error fetching dataset'):
@@ -489,12 +491,12 @@ def test_generic_tools_have_correct_metadata() -> None:
     """Verify name, description, and args_schema are set on all generic tools."""
     with patch.object(ApifyToolsClient, '__init__', return_value=None):
         tools = [
-            ApifyRunActorTool(apify_api_token='dummy'),
-            ApifyGetDatasetItemsTool(apify_api_token='dummy'),
-            ApifyRunActorAndGetItemsTool(apify_api_token='dummy'),
-            ApifyScrapeUrlTool(apify_api_token='dummy'),
-            ApifyRunTaskTool(apify_api_token='dummy'),
-            ApifyRunTaskAndGetItemsTool(apify_api_token='dummy'),
+            ApifyRunActorTool(apify_api_token='dummy'),  # type: ignore[call-arg]
+            ApifyGetDatasetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg]
+            ApifyRunActorAndGetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg]
+            ApifyScrapeUrlTool(apify_api_token='dummy'),  # type: ignore[call-arg]
+            ApifyRunTaskTool(apify_api_token='dummy'),  # type: ignore[call-arg]
+            ApifyRunTaskAndGetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg]
         ]
 
     expected_names = [

From da900ce0cdcdad33853b2bea33774fa042addd8f Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 09:52:52 +0200
Subject: [PATCH 20/63] feat: add three new tools  to _client.py

---
 langchain_apify/_client.py | 130 +++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index b131484..e6b4468 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -12,8 +12,12 @@
 from langchain_apify.utils import create_apify_client
 
 _SCRAPE_ACTOR_ID = 'apify/website-content-crawler'
+_CRAWL_ACTOR_ID = 'apify/website-content-crawler'
+_GOOGLE_SEARCH_ACTOR_ID = 'apify/google-search-scraper'
+_RAG_WEB_BROWSER_ACTOR_ID = 'apify/rag-web-browser'
 _DEFAULT_RUN_TIMEOUT_SECS = 300
 _DEFAULT_SCRAPE_TIMEOUT_SECS = 120
+_DEFAULT_CRAWL_TIMEOUT_SECS = 300
 _DEFAULT_DATASET_ITEMS_LIMIT = 100
 _RUN_STATUS_SUCCEEDED = 'SUCCEEDED'
 
@@ -239,6 +243,132 @@ def scrape_url(self, url: str, timeout_secs: int = _DEFAULT_SCRAPE_TIMEOUT_SECS)
             raise RuntimeError(msg)
         return content
 
+    def google_search(
+        self,
+        query: str,
+        max_results: int = 10,
+        country_code: str | None = None,
+        language_code: str | None = None,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
+    ) -> list[dict]:
+        """Run a Google search and return structured results.
+
+        Uses ``apify/google-search-scraper`` with a single query.
+
+        Args:
+            query: Search query string.
+            max_results: Maximum number of results to return.
+            country_code: Two-letter country code for localised results.
+            language_code: Two-letter language code.
+            timeout_secs: Maximum time to wait for the run to finish.
+
+        Returns:
+            List of result dicts, each with ``title``, ``url``, and
+            ``description`` keys.
+
+        Raises:
+            RuntimeError: If the Actor run fails.
+        """
+        run_input: dict = {
+            'queries': query,
+            'maxPagesPerQuery': 1,
+            'resultsPerPage': max_results,
+        }
+        if country_code is not None:
+            run_input['countryCode'] = country_code
+        if language_code is not None:
+            run_input['languageCode'] = language_code
+
+        _, items = self.run_actor_and_get_items(
+            _GOOGLE_SEARCH_ACTOR_ID,
+            run_input=run_input,
+            timeout_secs=timeout_secs,
+            dataset_items_limit=max_results,
+        )
+        results: list[dict] = []
+        for item in items:
+            for organic in item.get('organicResults', []):
+                results.append({
+                    'title': organic.get('title', ''),
+                    'url': organic.get('url', ''),
+                    'description': organic.get('description', ''),
+                })
+        return results[:max_results]
+
+    def rag_web_search(
+        self,
+        query: str,
+        max_results: int = 5,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
+    ) -> list[dict]:
+        """Search the web and return crawled page content for RAG.
+
+        Uses ``apify/rag-web-browser``.
+
+        Args:
+            query: Search query string.
+            max_results: Maximum number of results to return.
+            timeout_secs: Maximum time to wait for the run to finish.
+
+        Returns:
+            List of result dicts with ``crawledUrl``, ``title``, and
+            ``text`` keys (among others from the Actor).
+
+        Raises:
+            RuntimeError: If the Actor run fails.
+        """
+        run_input: dict = {
+            'query': query,
+            'maxResults': max_results,
+        }
+        _, items = self.run_actor_and_get_items(
+            _RAG_WEB_BROWSER_ACTOR_ID,
+            run_input=run_input,
+            timeout_secs=timeout_secs,
+            dataset_items_limit=max_results,
+        )
+        return items
+
+    def crawl_website(
+        self,
+        url: str,
+        max_crawl_pages: int = 10,
+        max_crawl_depth: int = 1,
+        crawler_type: str = 'cheerio',
+        timeout_secs: int = _DEFAULT_CRAWL_TIMEOUT_SECS,
+    ) -> list[dict]:
+        """Crawl a website and return page content.
+
+        Uses ``apify/website-content-crawler``.
+
+        Args:
+            url: Seed URL to start crawling from.
+            max_crawl_pages: Maximum number of pages to crawl.
+            max_crawl_depth: Maximum link-follow depth from the seed URL.
+            crawler_type: Crawler engine (e.g. ``"cheerio"``, ``"playwright"``).
+            timeout_secs: Maximum time to wait for the run to finish.
+
+        Returns:
+            List of page dicts, each with at least ``url``, ``title``, and
+            ``markdown`` (or ``text``) keys.
+
+        Raises:
+            RuntimeError: If the Actor run fails.
+        """
+        run_input: dict = {
+            'startUrls': [{'url': url}],
+            'maxCrawlPages': max_crawl_pages,
+            'maxCrawlDepth': max_crawl_depth,
+            'crawlerType': crawler_type,
+        }
+        _, items = self.run_actor_and_get_items(
+            _CRAWL_ACTOR_ID,
+            run_input=run_input,
+            timeout_secs=timeout_secs,
+            dataset_items_limit=max_crawl_pages,
+        )
+        return items
+
     @staticmethod
     def _check_run_status(run: dict) -> None:
         """Raise if the run did not succeed."""

From ff6ffebad79eac660a42eb7fcf0828932fffeedf Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 09:57:48 +0200
Subject: [PATCH 21/63] feat: implement apifygooglesearchtool and
 apifywebcrawlertool

---
 langchain_apify/_actor_tools.py | 143 ++++++++++++++++++++++++++++++++
 langchain_apify/tools.py        |  19 +++++
 2 files changed, 162 insertions(+)

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
index a989b11..c62d912 100644
--- a/langchain_apify/_actor_tools.py
+++ b/langchain_apify/_actor_tools.py
@@ -7,11 +7,154 @@
 
 from __future__ import annotations
 
+import json
+from typing import TYPE_CHECKING
+
+from langchain_core.tools import ToolException
+from pydantic import BaseModel
+
+from langchain_apify.tools import (
+    ApifyGoogleSearchInput,
+    ApifyWebCrawlerInput,
+    _ApifyGenericTool,
+)
+
+if TYPE_CHECKING:
+    from langchain_core.callbacks import CallbackManagerForToolRun
+
 # ---------------------------------------------------------------------------
 # Search & Crawling tools
 # ---------------------------------------------------------------------------
 
 
+class ApifyGoogleSearchTool(_ApifyGenericTool):  # type: ignore[override]
+    """Search Google and return structured results via Apify.
+
+    Wraps the ``apify/google-search-scraper`` Actor behind a simplified,
+    LLM-friendly interface.  Returns a JSON string containing an array of
+    result objects, each with ``title``, ``url``, and ``description`` keys.
+
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+
+    Returns:
+        JSON string — an array of ``{"title", "url", "description"}`` objects.
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifyGoogleSearchTool
+
+            tool = ApifyGoogleSearchTool()
+            results = tool.invoke({"query": "LangChain framework"})
+    """
+
+    name: str = 'apify_google_search'
+    description: str = (
+        'Search Google using Apify and return structured results as a JSON array.'
+        ' Each result has keys: title, url, description.'
+        ' Required: query (str) — the search query.'
+        ' Optional: max_results (int, default 10),'
+        ' country_code (str|null), language_code (str|null).'
+    )
+    args_schema: type[BaseModel] = ApifyGoogleSearchInput
+
+    def _run(
+        self,
+        query: str,
+        max_results: int = 10,
+        country_code: str | None = None,
+        language_code: str | None = None,
+        _run_manager: CallbackManagerForToolRun | None = None,
+    ) -> str:
+        try:
+            results = self._client.google_search(
+                query,
+                max_results=self._clamp_items(max_results),
+                country_code=country_code,
+                language_code=language_code,
+                timeout_secs=self.max_timeout_secs,
+            )
+        except RuntimeError as exc:
+            raise ToolException(str(exc)) from exc
+        return json.dumps(results)
+
+
+class ApifyWebCrawlerTool(_ApifyGenericTool):  # type: ignore[override]
+    """Crawl a website and return page content as JSON via Apify.
+
+    Wraps the ``apify/website-content-crawler`` Actor.  Returns a JSON string
+    containing an array of page objects, each with ``url``, ``title``, and
+    ``content`` (markdown) keys.
+
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+
+    Returns:
+        JSON string — an array of ``{"url", "title", "content"}`` objects.
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifyWebCrawlerTool
+
+            tool = ApifyWebCrawlerTool()
+            pages = tool.invoke({
+                "url": "https://docs.apify.com",
+                "max_crawl_pages": 5,
+            })
+    """
+
+    name: str = 'apify_web_crawler'
+    description: str = (
+        'Crawl a website using Apify and return page content as a JSON array.'
+        ' Each page object has keys: url, title, content (markdown).'
+        ' Required: url (str) — seed URL to crawl.'
+        ' Optional: max_crawl_pages (int, default 10),'
+        ' max_crawl_depth (int, default 1),'
+        ' crawler_type (str, default "cheerio"),'
+        ' timeout_secs (int, default 300).'
+    )
+    args_schema: type[BaseModel] = ApifyWebCrawlerInput
+
+    def _run(
+        self,
+        url: str,
+        max_crawl_pages: int = 10,
+        max_crawl_depth: int = 1,
+        crawler_type: str = 'cheerio',
+        timeout_secs: int = 300,
+        _run_manager: CallbackManagerForToolRun | None = None,
+    ) -> str:
+        try:
+            items = self._client.crawl_website(
+                url,
+                max_crawl_pages=self._clamp_items(max_crawl_pages),
+                max_crawl_depth=max_crawl_depth,
+                crawler_type=crawler_type,
+                timeout_secs=self._clamp_timeout(timeout_secs),
+            )
+        except RuntimeError as exc:
+            raise ToolException(str(exc)) from exc
+        pages = [
+            {
+                'url': item.get('url', ''),
+                'title': item.get('metadata', {}).get('title', ''),
+                'content': item.get('markdown') or item.get('text', ''),
+            }
+            for item in items
+        ]
+        return json.dumps(pages)
+
+
 # ---------------------------------------------------------------------------
 # Social-media tools
 # ---------------------------------------------------------------------------
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index f771d35..1db4c55 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -234,6 +234,25 @@ class ApifyScrapeUrlInput(BaseModel):
     timeout_secs: int = Field(default=120, description='Maximum time in seconds to wait for the crawl to finish.')
 
 
+class ApifyGoogleSearchInput(BaseModel):
+    """Input schema for :class:`ApifyGoogleSearchTool`."""
+
+    query: str = Field(description='Search query string.')
+    max_results: int = Field(default=10, description='Maximum number of search results to return.')
+    country_code: str | None = Field(default=None, description='Two-letter country code for localised results.')
+    language_code: str | None = Field(default=None, description='Two-letter language code.')
+
+
+class ApifyWebCrawlerInput(BaseModel):
+    """Input schema for :class:`ApifyWebCrawlerTool`."""
+
+    url: str = Field(description='Seed URL to start crawling from.')
+    max_crawl_pages: int = Field(default=10, description='Maximum number of pages to crawl.')
+    max_crawl_depth: int = Field(default=1, description='Maximum link-follow depth from the seed URL.')
+    crawler_type: str = Field(default='cheerio', description='Crawler engine (e.g. "cheerio", "playwright").')
+    timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the crawl to finish.')
+
+
 class ApifyRunTaskInput(BaseModel):
     """Input schema for :class:`ApifyRunTaskTool`."""
 

From 6e8888cc1663fa18214b09c4807f59ac7970fa69 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 10:02:45 +0200
Subject: [PATCH 22/63] feat: implement a apify search retrievel

---
 langchain_apify/retrievers.py | 135 ++++++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 langchain_apify/retrievers.py

diff --git a/langchain_apify/retrievers.py b/langchain_apify/retrievers.py
new file mode 100644
index 0000000..0c990d3
--- /dev/null
+++ b/langchain_apify/retrievers.py
@@ -0,0 +1,135 @@
+"""LangChain retrievers backed by Apify Actors."""
+
+from __future__ import annotations
+
+import os
+from typing import TYPE_CHECKING, Any
+
+from apify_client import ApifyClient, ApifyClientAsync
+from langchain_core.documents import Document
+from langchain_core.retrievers import BaseRetriever
+from pydantic import Field, PrivateAttr
+
+from langchain_apify._error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+from langchain_apify.utils import create_apify_client
+
+if TYPE_CHECKING:
+    from langchain_core.callbacks import (
+        AsyncCallbackManagerForRetrieverRun,
+        CallbackManagerForRetrieverRun,
+    )
+
+_RAG_WEB_BROWSER_ACTOR_ID = 'apify/rag-web-browser'
+_DEFAULT_TIMEOUT_SECS = 300
+
+
+class ApifySearchRetriever(BaseRetriever):
+    """Retrieve documents from the web for RAG using Apify.
+
+    Wraps the ``apify/rag-web-browser`` Actor.  Each invocation runs a web
+    search, crawls the top results, and returns their content as LangChain
+    ``Document`` objects ready for a RAG pipeline.
+
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+        max_results: Maximum number of ``Document`` objects to return per query.
+        timeout_secs: Maximum time in seconds to wait for the Actor run.
+
+    Returns:
+        List of ``Document`` objects.  ``page_content`` contains the crawled
+        text; ``metadata`` includes ``source`` (URL) and ``title``.
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifySearchRetriever
+
+            retriever = ApifySearchRetriever(max_results=3)
+            docs = retriever.invoke("What is LangChain?")
+    """
+
+    max_results: int = Field(default=5, description='Maximum number of documents to return.')
+    timeout_secs: int = Field(default=_DEFAULT_TIMEOUT_SECS, description='Maximum Actor run time in seconds.')
+
+    _sync_client: ApifyClient = PrivateAttr()
+    _async_client: ApifyClientAsync = PrivateAttr()
+
+    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
+        super().__init__(**kwargs)
+        token = apify_api_token or os.getenv('APIFY_API_TOKEN')
+        if not token:
+            msg = ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+            raise ValueError(msg)
+        self._sync_client = create_apify_client(ApifyClient, token)
+        self._async_client = create_apify_client(ApifyClientAsync, token)
+
+    def _get_relevant_documents(
+        self,
+        query: str,
+        *,
+        run_manager: CallbackManagerForRetrieverRun | None = None,
+    ) -> list[Document]:
+        run_input = {
+            'query': query,
+            'maxResults': self.max_results,
+        }
+        run = self._sync_client.actor(_RAG_WEB_BROWSER_ACTOR_ID).call(
+            run_input=run_input,
+            timeout_secs=self.timeout_secs,
+        )
+        if run is None:
+            return []
+
+        dataset_id = run.get('defaultDatasetId')
+        if not dataset_id:
+            return []
+
+        items = self._sync_client.dataset(dataset_id).list_items(
+            limit=self.max_results, clean=True,
+        ).items
+        return self._items_to_documents(items)
+
+    async def _aget_relevant_documents(
+        self,
+        query: str,
+        *,
+        run_manager: AsyncCallbackManagerForRetrieverRun | None = None,
+    ) -> list[Document]:
+        run_input = {
+            'query': query,
+            'maxResults': self.max_results,
+        }
+        run = await self._async_client.actor(_RAG_WEB_BROWSER_ACTOR_ID).call(
+            run_input=run_input,
+            timeout_secs=self.timeout_secs,
+        )
+        if run is None:
+            return []
+
+        dataset_id = run.get('defaultDatasetId')
+        if not dataset_id:
+            return []
+
+        items = (
+            await self._async_client.dataset(dataset_id).list_items(
+                limit=self.max_results, clean=True,
+            )
+        ).items
+        return self._items_to_documents(items)
+
+    @staticmethod
+    def _items_to_documents(items: list[dict]) -> list[Document]:
+        """Convert Actor dataset items to LangChain Documents."""
+        docs: list[Document] = []
+        for item in items:
+            page_content = item.get('text') or item.get('markdown') or ''
+            metadata: dict[str, Any] = {
+                'source': item.get('crawledUrl') or item.get('url', ''),
+                'title': item.get('metadata', {}).get('title', '') if isinstance(item.get('metadata'), dict) else '',
+            }
+            docs.append(Document(page_content=page_content, metadata=metadata))
+        return docs

From b124ce154ca678356ea4901143f4f8825864cfcf Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 10:05:57 +0200
Subject: [PATCH 23/63] feat: add apify crawl loader to document_loaders.py

---
 langchain_apify/document_loaders.py | 89 ++++++++++++++++++++++++++++-
 1 file changed, 88 insertions(+), 1 deletion(-)

diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index 49befb6..d8064a8 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -8,8 +8,9 @@
 from langchain_core.document_loaders.base import BaseLoader
 from langchain_core.documents import Document  # noqa: TCH002
 from langchain_core.utils import get_from_dict_or_env
-from pydantic import BaseModel, ConfigDict, model_validator
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator
 
+from langchain_apify._client import ApifyToolsClient
 from langchain_apify.utils import create_apify_client
 
 if TYPE_CHECKING:
@@ -112,3 +113,89 @@ def lazy_load(self) -> Iterator[Document]:
         )
         for item in dataset_items:
             yield self.dataset_mapping_function(item)
+
+
+class ApifyCrawlLoader(BaseLoader):
+    """Crawl a website and load pages as LangChain Documents.
+
+    Wraps the ``apify/website-content-crawler`` Actor.  Runs a crawl starting
+    from the seed URL and converts each crawled page into a ``Document`` with
+    markdown content and metadata (source URL, title, crawl depth).
+
+    Args:
+        url: Seed URL to start crawling from.
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
+        max_crawl_pages: Maximum number of pages to crawl.
+        max_crawl_depth: Maximum link-follow depth from the seed URL.
+        crawler_type: Crawler engine (e.g. ``"cheerio"``, ``"playwright"``).
+        timeout_secs: Maximum time in seconds to wait for the crawl.
+
+    Returns:
+        Iterator (or list) of ``Document`` objects.  ``page_content`` contains
+        the page markdown; ``metadata`` includes ``source``, ``title``, and
+        ``crawl_depth``.
+
+    Example:
+        .. code-block:: python
+
+            import os
+            os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+            from langchain_apify import ApifyCrawlLoader
+
+            loader = ApifyCrawlLoader(
+                url="https://docs.apify.com",
+                max_crawl_pages=5,
+            )
+            documents = loader.load()
+    """
+
+    url: str
+    max_crawl_pages: int = Field(default=10)
+    max_crawl_depth: int = Field(default=1)
+    crawler_type: str = Field(default='cheerio')
+    timeout_secs: int = Field(default=300)
+
+    _client: ApifyToolsClient = PrivateAttr()
+
+    def __init__(
+        self,
+        url: str,
+        apify_api_token: str | None = None,
+        *,
+        max_crawl_pages: int = 10,
+        max_crawl_depth: int = 1,
+        crawler_type: str = 'cheerio',
+        timeout_secs: int = 300,
+    ) -> None:
+        super().__init__(
+            url=url,
+            max_crawl_pages=max_crawl_pages,
+            max_crawl_depth=max_crawl_depth,
+            crawler_type=crawler_type,
+            timeout_secs=timeout_secs,
+        )
+        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
+
+    def lazy_load(self) -> Iterator[Document]:
+        """Crawl the website and yield Documents lazily.
+
+        Yields:
+            Document: One document per crawled page.
+        """
+        items = self._client.crawl_website(
+            self.url,
+            max_crawl_pages=self.max_crawl_pages,
+            max_crawl_depth=self.max_crawl_depth,
+            crawler_type=self.crawler_type,
+            timeout_secs=self.timeout_secs,
+        )
+        for item in items:
+            page_content = item.get('markdown') or item.get('text') or ''
+            metadata: dict[str, Any] = {
+                'source': item.get('url', ''),
+                'title': item.get('metadata', {}).get('title', '') if isinstance(item.get('metadata'), dict) else '',
+                'crawl_depth': item.get('crawlDepth', 0),
+            }
+            yield Document(page_content=page_content, metadata=metadata)

From 029b9e125fbc5ea8aa79988148477c0bad89e8bd Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 10:08:08 +0200
Subject: [PATCH 24/63] feat: update __init__

---
 langchain_apify/__init__.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
index fa1f369..a57c52a 100644
--- a/langchain_apify/__init__.py
+++ b/langchain_apify/__init__.py
@@ -2,7 +2,9 @@
 
 from importlib import metadata
 
-from langchain_apify.document_loaders import ApifyDatasetLoader
+from langchain_apify._actor_tools import ApifyGoogleSearchTool, ApifyWebCrawlerTool
+from langchain_apify.document_loaders import ApifyCrawlLoader, ApifyDatasetLoader
+from langchain_apify.retrievers import ApifySearchRetriever
 from langchain_apify.tools import (
     ApifyActorsTool,
     ApifyGetDatasetItemsTool,
@@ -33,6 +35,11 @@
     ApifyRunTaskAndGetItemsTool,
 ]
 
+APIFY_ACTOR_TOOLS: list[type] = [
+    ApifyGoogleSearchTool,
+    ApifyWebCrawlerTool,
+]
+
 __all__ = [
     # Existing components (backward-compatible)
     'ApifyActorsTool',
@@ -45,7 +52,15 @@
     'ApifyRunTaskAndGetItemsTool',
     'ApifyRunTaskTool',
     'ApifyScrapeUrlTool',
+    # Actor-specific tools
+    'ApifyGoogleSearchTool',
+    'ApifyWebCrawlerTool',
+    # Retriever
+    'ApifySearchRetriever',
+    # Loaders
+    'ApifyCrawlLoader',
     # Tool group lists
+    'APIFY_ACTOR_TOOLS',
     'APIFY_CORE_TOOLS',
     # Meta
     '__version__',

From c7ee287c2f992f343840cbf84ee2b3f48999f26b Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 10:19:11 +0200
Subject: [PATCH 25/63] feat: add unit tests

---
 tests/unit_tests/test_client.py | 151 ++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index 1c93f84..ca85138 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -281,3 +281,154 @@ def test_run_task_and_get_items_dataset_fetch_network_error(
 
     with pytest.raises(RuntimeError, match='Network error fetching dataset'):
         client.run_task_and_get_items('user/my-task')
+
+
+# ---------------------------------------------------------------------------
+# google_search
+# ---------------------------------------------------------------------------
+
+GOOGLE_SEARCH_ITEMS: list[dict] = [
+    {
+        'organicResults': [
+            {'title': 'Result 1', 'url': 'https://example.com/1', 'description': 'Desc 1'},
+            {'title': 'Result 2', 'url': 'https://example.com/2', 'description': 'Desc 2'},
+        ],
+    },
+]
+
+
+def test_google_search_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = GOOGLE_SEARCH_ITEMS
+
+    results = client.google_search('test query', max_results=5)
+
+    assert len(results) == 2
+    assert results[0] == {'title': 'Result 1', 'url': 'https://example.com/1', 'description': 'Desc 1'}
+    assert results[1] == {'title': 'Result 2', 'url': 'https://example.com/2', 'description': 'Desc 2'}
+
+
+def test_google_search_with_locale(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = GOOGLE_SEARCH_ITEMS
+
+    client.google_search('test', country_code='us', language_code='en')
+
+    call_args = mock_apify_client.actor.return_value.call.call_args
+    run_input = call_args.kwargs['run_input']
+    assert run_input['countryCode'] == 'us'
+    assert run_input['languageCode'] == 'en'
+
+
+def test_google_search_caps_results(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    many_results = [{'title': f'R{i}', 'url': f'https://example.com/{i}', 'description': f'D{i}'} for i in range(20)]
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = [{'organicResults': many_results}]
+
+    results = client.google_search('test', max_results=3)
+
+    assert len(results) == 3
+
+
+def test_google_search_empty_results(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = [{'organicResults': []}]
+
+    results = client.google_search('test')
+
+    assert results == []
+
+
+def test_google_search_failed_run_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = FAILED_RUN
+
+    with pytest.raises(RuntimeError, match='run-fail'):
+        client.google_search('test')
+
+
+# ---------------------------------------------------------------------------
+# rag_web_search
+# ---------------------------------------------------------------------------
+
+RAG_SEARCH_ITEMS: list[dict] = [
+    {'crawledUrl': 'https://example.com/1', 'text': 'Page 1 content', 'metadata': {'title': 'Page 1'}},
+    {'crawledUrl': 'https://example.com/2', 'text': 'Page 2 content', 'metadata': {'title': 'Page 2'}},
+]
+
+
+def test_rag_web_search_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = RAG_SEARCH_ITEMS
+
+    items = client.rag_web_search('test query', max_results=5)
+
+    assert len(items) == 2
+    assert items[0]['crawledUrl'] == 'https://example.com/1'
+    assert items[1]['text'] == 'Page 2 content'
+
+
+def test_rag_web_search_empty(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = []
+
+    items = client.rag_web_search('test')
+
+    assert items == []
+
+
+def test_rag_web_search_failed_run_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = FAILED_RUN
+
+    with pytest.raises(RuntimeError, match='run-fail'):
+        client.rag_web_search('test')
+
+
+# ---------------------------------------------------------------------------
+# crawl_website
+# ---------------------------------------------------------------------------
+
+CRAWL_ITEMS: list[dict] = [
+    {'url': 'https://example.com/', 'markdown': '# Home', 'text': 'Home', 'metadata': {'title': 'Home'}},
+    {'url': 'https://example.com/about', 'markdown': '# About', 'text': 'About', 'metadata': {'title': 'About'}},
+]
+
+
+def test_crawl_website_success(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = CRAWL_ITEMS
+
+    items = client.crawl_website('https://example.com')
+
+    assert len(items) == 2
+    assert items[0]['url'] == 'https://example.com/'
+    assert items[1]['markdown'] == '# About'
+
+
+def test_crawl_website_passes_params(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = []
+
+    client.crawl_website('https://example.com', max_crawl_pages=5, max_crawl_depth=2, crawler_type='playwright')
+
+    call_args = mock_apify_client.actor.return_value.call.call_args
+    run_input = call_args.kwargs['run_input']
+    assert run_input['startUrls'] == [{'url': 'https://example.com'}]
+    assert run_input['maxCrawlPages'] == 5
+    assert run_input['maxCrawlDepth'] == 2
+    assert run_input['crawlerType'] == 'playwright'
+
+
+def test_crawl_website_empty(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
+    mock_apify_client.dataset.return_value.list_items.return_value.items = []
+
+    items = client.crawl_website('https://example.com')
+
+    assert items == []
+
+
+def test_crawl_website_failed_run_raises(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    mock_apify_client.actor.return_value.call.return_value = FAILED_RUN
+
+    with pytest.raises(RuntimeError, match='run-fail'):
+        client.crawl_website('https://example.com')

From ec60765064eed63d6b8ac81f88a410531e5c0949 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 10:22:29 +0200
Subject: [PATCH 26/63] feat: add actor tools unit tests

---
 tests/unit_tests/test_actor_tools.py | 184 +++++++++++++++++++++++++++
 1 file changed, 184 insertions(+)
 create mode 100644 tests/unit_tests/test_actor_tools.py

diff --git a/tests/unit_tests/test_actor_tools.py b/tests/unit_tests/test_actor_tools.py
new file mode 100644
index 0000000..2e5fbe4
--- /dev/null
+++ b/tests/unit_tests/test_actor_tools.py
@@ -0,0 +1,184 @@
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+from langchain_core.tools import ToolException
+
+from langchain_apify import APIFY_ACTOR_TOOLS, ApifyGoogleSearchTool, ApifyWebCrawlerTool
+from langchain_apify._client import ApifyToolsClient
+from langchain_apify.tools import _ApifyGenericTool
+from tests.unit_tests.conftest import make_tool
+
+# ---------------------------------------------------------------------------
+# ApifyGoogleSearchTool
+# ---------------------------------------------------------------------------
+
+
+def test_google_search_tool_returns_json(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.google_search.return_value = [
+        {'title': 'Result 1', 'url': 'https://example.com/1', 'description': 'Desc 1'},
+        {'title': 'Result 2', 'url': 'https://example.com/2', 'description': 'Desc 2'},
+    ]
+    tool = make_tool(ApifyGoogleSearchTool, mock_tools_client)
+
+    result = tool._run(query='test query')
+
+    parsed = json.loads(result)
+    assert len(parsed) == 2
+    assert parsed[0]['title'] == 'Result 1'
+    assert parsed[1]['url'] == 'https://example.com/2'
+
+
+def test_google_search_tool_passes_params(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.google_search.return_value = []
+    tool = make_tool(ApifyGoogleSearchTool, mock_tools_client)
+
+    tool._run(query='test', max_results=5, country_code='us', language_code='en')
+
+    mock_tools_client.google_search.assert_called_once_with(
+        'test',
+        max_results=5,
+        country_code='us',
+        language_code='en',
+        timeout_secs=600,
+    )
+
+
+def test_google_search_tool_clamps_max_results(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.google_search.return_value = []
+    tool = make_tool(ApifyGoogleSearchTool, mock_tools_client, max_items=3)
+
+    tool._run(query='test', max_results=100)
+
+    call_kwargs = mock_tools_client.google_search.call_args
+    assert call_kwargs.kwargs['max_results'] == 3
+
+
+def test_google_search_tool_empty_results(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.google_search.return_value = []
+    tool = make_tool(ApifyGoogleSearchTool, mock_tools_client)
+
+    result = tool._run(query='nothing')
+
+    assert json.loads(result) == []
+
+
+def test_google_search_tool_failure_raises_tool_exception(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.google_search.side_effect = RuntimeError('Actor run run-bad ended with status FAILED.')
+    tool = make_tool(ApifyGoogleSearchTool, mock_tools_client)
+
+    with pytest.raises(ToolException, match='FAILED'):
+        tool._run(query='test')
+
+
+def test_google_search_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyGoogleSearchTool()
+
+
+# ---------------------------------------------------------------------------
+# ApifyWebCrawlerTool
+# ---------------------------------------------------------------------------
+
+
+def test_web_crawler_tool_returns_json(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.crawl_website.return_value = [
+        {'url': 'https://example.com/', 'markdown': '# Home', 'text': 'Home', 'metadata': {'title': 'Home'}},
+        {'url': 'https://example.com/about', 'markdown': '', 'text': 'About us', 'metadata': {'title': 'About'}},
+    ]
+    tool = make_tool(ApifyWebCrawlerTool, mock_tools_client)
+
+    result = tool._run(url='https://example.com')
+
+    parsed = json.loads(result)
+    assert len(parsed) == 2
+    assert parsed[0] == {'url': 'https://example.com/', 'title': 'Home', 'content': '# Home'}
+    assert parsed[1] == {'url': 'https://example.com/about', 'title': 'About', 'content': 'About us'}
+
+
+def test_web_crawler_tool_passes_params(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.crawl_website.return_value = []
+    tool = make_tool(ApifyWebCrawlerTool, mock_tools_client)
+
+    tool._run(
+        url='https://example.com',
+        max_crawl_pages=5,
+        max_crawl_depth=2,
+        crawler_type='playwright',
+        timeout_secs=120,
+    )
+
+    mock_tools_client.crawl_website.assert_called_once_with(
+        'https://example.com',
+        max_crawl_pages=5,
+        max_crawl_depth=2,
+        crawler_type='playwright',
+        timeout_secs=120,
+    )
+
+
+def test_web_crawler_tool_clamps_pages_and_timeout(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.crawl_website.return_value = []
+    tool = make_tool(ApifyWebCrawlerTool, mock_tools_client, max_items=3, max_timeout_secs=60)
+
+    tool._run(url='https://example.com', max_crawl_pages=100, timeout_secs=9999)
+
+    call_kwargs = mock_tools_client.crawl_website.call_args
+    assert call_kwargs.kwargs['max_crawl_pages'] == 3
+    assert call_kwargs.kwargs['timeout_secs'] == 60
+
+
+def test_web_crawler_tool_empty_results(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.crawl_website.return_value = []
+    tool = make_tool(ApifyWebCrawlerTool, mock_tools_client)
+
+    result = tool._run(url='https://example.com')
+
+    assert json.loads(result) == []
+
+
+def test_web_crawler_tool_failure_raises_tool_exception(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.crawl_website.side_effect = RuntimeError('Actor run run-bad ended with status TIMED-OUT.')
+    tool = make_tool(ApifyWebCrawlerTool, mock_tools_client)
+
+    with pytest.raises(ToolException, match='TIMED-OUT'):
+        tool._run(url='https://example.com')
+
+
+def test_web_crawler_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyWebCrawlerTool()
+
+
+# ---------------------------------------------------------------------------
+# Metadata & inheritance
+# ---------------------------------------------------------------------------
+
+
+def test_actor_tools_inherit_from_generic_base() -> None:
+    for tool_cls in (ApifyGoogleSearchTool, ApifyWebCrawlerTool):
+        assert issubclass(tool_cls, _ApifyGenericTool), f'{tool_cls.__name__} must extend _ApifyGenericTool'
+
+
+def test_actor_tools_have_correct_metadata() -> None:
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        tools = [
+            ApifyGoogleSearchTool(apify_api_token='dummy'),
+            ApifyWebCrawlerTool(apify_api_token='dummy'),
+        ]
+
+    expected_names = ['apify_google_search', 'apify_web_crawler']
+    for tool, expected_name in zip(tools, expected_names):
+        assert tool.name == expected_name
+        assert tool.description
+        assert tool.args_schema is not None
+        assert tool.handle_tool_error is True
+
+
+def test_apify_actor_tools_list() -> None:
+    assert set(APIFY_ACTOR_TOOLS) == {ApifyGoogleSearchTool, ApifyWebCrawlerTool}
+    assert len(APIFY_ACTOR_TOOLS) == 2

From c07718663969ec1fdc9726a8ab767b0a6978ca51 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 10:22:50 +0200
Subject: [PATCH 27/63] feat: add retrievers unit tests

---
 tests/unit_tests/test_retrievers.py | 224 ++++++++++++++++++++++++++++
 1 file changed, 224 insertions(+)
 create mode 100644 tests/unit_tests/test_retrievers.py

diff --git a/tests/unit_tests/test_retrievers.py b/tests/unit_tests/test_retrievers.py
new file mode 100644
index 0000000..779a9c8
--- /dev/null
+++ b/tests/unit_tests/test_retrievers.py
@@ -0,0 +1,224 @@
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from langchain_core.documents import Document
+
+from langchain_apify.retrievers import ApifySearchRetriever
+
+RAG_ITEMS: list[dict] = [
+    {
+        'crawledUrl': 'https://example.com/1',
+        'text': 'Page 1 content',
+        'metadata': {'title': 'Page 1'},
+    },
+    {
+        'crawledUrl': 'https://example.com/2',
+        'text': 'Page 2 content',
+        'metadata': {'title': 'Page 2'},
+    },
+]
+
+
+def _make_retriever(
+    mock_sync_client: MagicMock,
+    mock_async_client: MagicMock | None = None,
+    **kwargs: object,
+) -> ApifySearchRetriever:
+    """Create a retriever with mocked Apify clients."""
+    with (
+        patch('langchain_apify.retrievers.create_apify_client') as mock_create,
+    ):
+        mock_create.side_effect = [mock_sync_client, mock_async_client or MagicMock()]
+        return ApifySearchRetriever(apify_api_token='dummy-token', **kwargs)
+
+
+# ---------------------------------------------------------------------------
+# __init__
+# ---------------------------------------------------------------------------
+
+
+def test_missing_token_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifySearchRetriever()
+
+
+def test_init_with_explicit_token() -> None:
+    with patch('langchain_apify.retrievers.create_apify_client'):
+        retriever = ApifySearchRetriever(apify_api_token='my-token')
+        assert retriever.max_results == 5
+        assert retriever.timeout_secs == 300
+
+
+def test_init_custom_params() -> None:
+    with patch('langchain_apify.retrievers.create_apify_client'):
+        retriever = ApifySearchRetriever(apify_api_token='t', max_results=3, timeout_secs=60)
+        assert retriever.max_results == 3
+        assert retriever.timeout_secs == 60
+
+
+# ---------------------------------------------------------------------------
+# _get_relevant_documents (sync)
+# ---------------------------------------------------------------------------
+
+
+def test_sync_returns_documents() -> None:
+    mock_client = MagicMock()
+    mock_client.actor.return_value.call.return_value = {
+        'id': 'run-1',
+        'status': 'SUCCEEDED',
+        'defaultDatasetId': 'ds-1',
+    }
+    mock_client.dataset.return_value.list_items.return_value.items = RAG_ITEMS
+    retriever = _make_retriever(mock_client, max_results=5)
+
+    docs = retriever._get_relevant_documents('test query')
+
+    assert len(docs) == 2
+    assert all(isinstance(d, Document) for d in docs)
+    assert docs[0].page_content == 'Page 1 content'
+    assert docs[0].metadata['source'] == 'https://example.com/1'
+    assert docs[0].metadata['title'] == 'Page 1'
+    assert docs[1].page_content == 'Page 2 content'
+    assert docs[1].metadata['source'] == 'https://example.com/2'
+
+
+def test_sync_passes_correct_input() -> None:
+    mock_client = MagicMock()
+    mock_client.actor.return_value.call.return_value = {
+        'defaultDatasetId': 'ds-1',
+    }
+    mock_client.dataset.return_value.list_items.return_value.items = []
+    retriever = _make_retriever(mock_client, max_results=3, timeout_secs=60)
+
+    retriever._get_relevant_documents('my search')
+
+    mock_client.actor.return_value.call.assert_called_once_with(
+        run_input={'query': 'my search', 'maxResults': 3},
+        timeout_secs=60,
+    )
+    mock_client.dataset.return_value.list_items.assert_called_once_with(
+        limit=3, clean=True,
+    )
+
+
+def test_sync_empty_results() -> None:
+    mock_client = MagicMock()
+    mock_client.actor.return_value.call.return_value = {
+        'defaultDatasetId': 'ds-1',
+    }
+    mock_client.dataset.return_value.list_items.return_value.items = []
+    retriever = _make_retriever(mock_client)
+
+    docs = retriever._get_relevant_documents('test')
+
+    assert docs == []
+
+
+def test_sync_none_run_returns_empty() -> None:
+    mock_client = MagicMock()
+    mock_client.actor.return_value.call.return_value = None
+    retriever = _make_retriever(mock_client)
+
+    docs = retriever._get_relevant_documents('test')
+
+    assert docs == []
+
+
+def test_sync_no_dataset_id_returns_empty() -> None:
+    mock_client = MagicMock()
+    mock_client.actor.return_value.call.return_value = {'id': 'run-1', 'defaultDatasetId': None}
+    retriever = _make_retriever(mock_client)
+
+    docs = retriever._get_relevant_documents('test')
+
+    assert docs == []
+
+
+# ---------------------------------------------------------------------------
+# _aget_relevant_documents (async)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_async_returns_documents() -> None:
+    mock_async = MagicMock()
+    mock_async.actor.return_value.call = AsyncMock(return_value={
+        'id': 'run-1',
+        'status': 'SUCCEEDED',
+        'defaultDatasetId': 'ds-1',
+    })
+    mock_list_items = AsyncMock()
+    mock_list_items.return_value.items = RAG_ITEMS
+    mock_async.dataset.return_value.list_items = mock_list_items
+
+    retriever = _make_retriever(MagicMock(), mock_async, max_results=5)
+
+    docs = await retriever._aget_relevant_documents('test query')
+
+    assert len(docs) == 2
+    assert all(isinstance(d, Document) for d in docs)
+    assert docs[0].page_content == 'Page 1 content'
+    assert docs[0].metadata['source'] == 'https://example.com/1'
+
+
+@pytest.mark.asyncio
+async def test_async_none_run_returns_empty() -> None:
+    mock_async = MagicMock()
+    mock_async.actor.return_value.call = AsyncMock(return_value=None)
+    retriever = _make_retriever(MagicMock(), mock_async)
+
+    docs = await retriever._aget_relevant_documents('test')
+
+    assert docs == []
+
+
+@pytest.mark.asyncio
+async def test_async_no_dataset_id_returns_empty() -> None:
+    mock_async = MagicMock()
+    mock_async.actor.return_value.call = AsyncMock(return_value={'defaultDatasetId': None})
+    retriever = _make_retriever(MagicMock(), mock_async)
+
+    docs = await retriever._aget_relevant_documents('test')
+
+    assert docs == []
+
+
+# ---------------------------------------------------------------------------
+# _items_to_documents edge cases
+# ---------------------------------------------------------------------------
+
+
+def test_items_to_documents_uses_url_fallback() -> None:
+    items = [{'url': 'https://fallback.com', 'text': 'content', 'metadata': {'title': 'T'}}]
+
+    docs = ApifySearchRetriever._items_to_documents(items)
+
+    assert docs[0].metadata['source'] == 'https://fallback.com'
+
+
+def test_items_to_documents_uses_markdown_fallback() -> None:
+    items = [{'crawledUrl': 'https://example.com', 'markdown': '# MD content', 'metadata': {'title': 'T'}}]
+
+    docs = ApifySearchRetriever._items_to_documents(items)
+
+    assert docs[0].page_content == '# MD content'
+
+
+def test_items_to_documents_missing_metadata() -> None:
+    items = [{'crawledUrl': 'https://example.com', 'text': 'content'}]
+
+    docs = ApifySearchRetriever._items_to_documents(items)
+
+    assert docs[0].metadata['title'] == ''
+    assert docs[0].metadata['source'] == 'https://example.com'
+
+
+def test_items_to_documents_non_dict_metadata() -> None:
+    items = [{'crawledUrl': 'https://example.com', 'text': 'content', 'metadata': 'not-a-dict'}]
+
+    docs = ApifySearchRetriever._items_to_documents(items)
+
+    assert docs[0].metadata['title'] == ''

From 0b4ecbb3c3323230465994a3dd2925834b524bff Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 10:50:17 +0200
Subject: [PATCH 28/63] feat: simplify apify crawl loader init and enhance unit
 tests

---
 langchain_apify/document_loaders.py       |  22 +---
 tests/unit_tests/test_document_loaders.py | 140 +++++++++++++++++++++-
 2 files changed, 144 insertions(+), 18 deletions(-)

diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index d8064a8..3a48329 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -8,7 +8,7 @@
 from langchain_core.document_loaders.base import BaseLoader
 from langchain_core.documents import Document  # noqa: TCH002
 from langchain_core.utils import get_from_dict_or_env
-from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator
+from pydantic import BaseModel, ConfigDict, model_validator
 
 from langchain_apify._client import ApifyToolsClient
 from langchain_apify.utils import create_apify_client
@@ -151,14 +151,6 @@ class ApifyCrawlLoader(BaseLoader):
             documents = loader.load()
     """
 
-    url: str
-    max_crawl_pages: int = Field(default=10)
-    max_crawl_depth: int = Field(default=1)
-    crawler_type: str = Field(default='cheerio')
-    timeout_secs: int = Field(default=300)
-
-    _client: ApifyToolsClient = PrivateAttr()
-
     def __init__(
         self,
         url: str,
@@ -169,13 +161,11 @@ def __init__(
         crawler_type: str = 'cheerio',
         timeout_secs: int = 300,
     ) -> None:
-        super().__init__(
-            url=url,
-            max_crawl_pages=max_crawl_pages,
-            max_crawl_depth=max_crawl_depth,
-            crawler_type=crawler_type,
-            timeout_secs=timeout_secs,
-        )
+        self.url = url
+        self.max_crawl_pages = max_crawl_pages
+        self.max_crawl_depth = max_crawl_depth
+        self.crawler_type = crawler_type
+        self.timeout_secs = timeout_secs
         self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
     def lazy_load(self) -> Iterator[Document]:
diff --git a/tests/unit_tests/test_document_loaders.py b/tests/unit_tests/test_document_loaders.py
index a6c7a61..4b3d493 100644
--- a/tests/unit_tests/test_document_loaders.py
+++ b/tests/unit_tests/test_document_loaders.py
@@ -1,10 +1,14 @@
-from unittest.mock import patch
+from __future__ import annotations
 
+from unittest.mock import MagicMock, patch
+
+import pytest
 from apify_client._types import ListPage
 from apify_client.clients import DatasetClient
 from langchain_core.documents import Document
 
-from langchain_apify import ApifyDatasetLoader
+from langchain_apify import ApifyCrawlLoader, ApifyDatasetLoader
+from langchain_apify._client import ApifyToolsClient
 
 
 def test_apify_dataset_loader_load() -> None:
@@ -55,3 +59,135 @@ def test_apify_dataset_loader_lazy_load() -> None:
         mock_list_items.assert_called_once()
         assert documents[0].page_content == 'Apify is great!'
         assert documents[0].metadata['source'] == 'https://apify.com'
+
+
+# ---------------------------------------------------------------------------
+# ApifyCrawlLoader
+# ---------------------------------------------------------------------------
+
+CRAWL_ITEMS: list[dict] = [
+    {
+        'url': 'https://example.com/',
+        'markdown': '# Home',
+        'text': 'Home',
+        'metadata': {'title': 'Home Page'},
+        'crawlDepth': 0,
+    },
+    {
+        'url': 'https://example.com/about',
+        'markdown': '# About',
+        'text': 'About',
+        'metadata': {'title': 'About Page'},
+        'crawlDepth': 1,
+    },
+]
+
+
+def _make_crawl_loader(
+    mock_client: MagicMock,
+    **kwargs: object,
+) -> ApifyCrawlLoader:
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        loader = ApifyCrawlLoader(url='https://example.com', apify_api_token='dummy', **kwargs)
+    loader._client = mock_client
+    return loader
+
+
+def test_crawl_loader_lazy_load() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.crawl_website.return_value = CRAWL_ITEMS
+    loader = _make_crawl_loader(mock_client)
+
+    docs = list(loader.lazy_load())
+
+    assert len(docs) == 2
+    assert all(isinstance(d, Document) for d in docs)
+    assert docs[0].page_content == '# Home'
+    assert docs[0].metadata['source'] == 'https://example.com/'
+    assert docs[0].metadata['title'] == 'Home Page'
+    assert docs[0].metadata['crawl_depth'] == 0
+    assert docs[1].page_content == '# About'
+    assert docs[1].metadata['crawl_depth'] == 1
+
+
+def test_crawl_loader_load_delegates_to_lazy_load() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.crawl_website.return_value = CRAWL_ITEMS
+    loader = _make_crawl_loader(mock_client)
+
+    docs = loader.load()
+
+    assert len(docs) == 2
+    assert docs[0].page_content == '# Home'
+
+
+def test_crawl_loader_passes_params() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.crawl_website.return_value = []
+    loader = _make_crawl_loader(
+        mock_client,
+        max_crawl_pages=5,
+        max_crawl_depth=2,
+        crawler_type='playwright',
+        timeout_secs=120,
+    )
+
+    list(loader.lazy_load())
+
+    mock_client.crawl_website.assert_called_once_with(
+        'https://example.com',
+        max_crawl_pages=5,
+        max_crawl_depth=2,
+        crawler_type='playwright',
+        timeout_secs=120,
+    )
+
+
+def test_crawl_loader_empty_results() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.crawl_website.return_value = []
+    loader = _make_crawl_loader(mock_client)
+
+    docs = loader.load()
+
+    assert docs == []
+
+
+def test_crawl_loader_text_fallback() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.crawl_website.return_value = [
+        {'url': 'https://example.com/', 'text': 'Plain text', 'metadata': {'title': 'T'}},
+    ]
+    loader = _make_crawl_loader(mock_client)
+
+    docs = list(loader.lazy_load())
+
+    assert docs[0].page_content == 'Plain text'
+
+
+def test_crawl_loader_missing_metadata() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.crawl_website.return_value = [
+        {'url': 'https://example.com/', 'markdown': '# Content'},
+    ]
+    loader = _make_crawl_loader(mock_client)
+
+    docs = list(loader.lazy_load())
+
+    assert docs[0].metadata['title'] == ''
+    assert docs[0].metadata['crawl_depth'] == 0
+
+
+def test_crawl_loader_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyCrawlLoader(url='https://example.com')
+
+
+def test_crawl_loader_failure_raises(mock_tools_client: MagicMock) -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.crawl_website.side_effect = RuntimeError('Actor run run-bad ended with status FAILED.')
+    loader = _make_crawl_loader(mock_client)
+
+    with pytest.raises(RuntimeError, match='FAILED'):
+        loader.load()

From 005294b8cb0c48b4e6a95d926a7a7401c88343b4 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Wed, 22 Apr 2026 14:08:24 +0200
Subject: [PATCH 29/63] ref: align private scope conventions with langchain
 partner package standards

---
 langchain_apify/_client.py              | 18 ++++++++---------
 langchain_apify/_error_messages.py      |  6 +++---
 langchain_apify/{utils.py => _utils.py} |  8 ++++----
 langchain_apify/document_loaders.py     |  9 ++++-----
 langchain_apify/tools.py                | 27 ++++++++++++++-----------
 langchain_apify/wrappers.py             | 12 +++++------
 tests/integration_tests/test_utils.py   | 10 ++++-----
 tests/unit_tests/conftest.py            |  2 +-
 tests/unit_tests/test_client.py         |  4 ++--
 tests/unit_tests/test_tools.py          |  4 ++--
 10 files changed, 51 insertions(+), 49 deletions(-)
 rename langchain_apify/{utils.py => _utils.py} (94%)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index b131484..84e840a 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -5,11 +5,11 @@
 from apify_client import ApifyClient
 
 from langchain_apify._error_messages import (
-    ERROR_ACTOR_RUN_FAILED,
-    ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET,
-    ERROR_SCRAPE_EMPTY,
+    _ERROR_ACTOR_RUN_FAILED,
+    _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET,
+    _ERROR_SCRAPE_EMPTY,
 )
-from langchain_apify.utils import create_apify_client
+from langchain_apify._utils import _create_apify_client
 
 _SCRAPE_ACTOR_ID = 'apify/website-content-crawler'
 _DEFAULT_RUN_TIMEOUT_SECS = 300
@@ -35,9 +35,9 @@ class ApifyToolsClient:
     def __init__(self, apify_api_token: str | None = None) -> None:
         token = apify_api_token or os.getenv('APIFY_API_TOKEN')
         if not token:
-            msg = ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+            msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
             raise ValueError(msg)
-        self._client = create_apify_client(ApifyClient, token)
+        self._client = _create_apify_client(ApifyClient, token)
 
     def run_actor(
         self,
@@ -230,12 +230,12 @@ def scrape_url(self, url: str, timeout_secs: int = _DEFAULT_SCRAPE_TIMEOUT_SECS)
             dataset_items_limit=1,
         )
         if not items:
-            msg = ERROR_SCRAPE_EMPTY.format(url=url)
+            msg = _ERROR_SCRAPE_EMPTY.format(url=url)
             raise RuntimeError(msg)
 
         content = items[0].get('markdown') or items[0].get('text') or ''
         if not content:
-            msg = ERROR_SCRAPE_EMPTY.format(url=url)
+            msg = _ERROR_SCRAPE_EMPTY.format(url=url)
             raise RuntimeError(msg)
         return content
 
@@ -245,5 +245,5 @@ def _check_run_status(run: dict) -> None:
         status = run.get('status')
         if status != _RUN_STATUS_SUCCEEDED:
             run_id = run.get('id', 'unknown')
-            msg = ERROR_ACTOR_RUN_FAILED.format(run_id=run_id, status=status)
+            msg = _ERROR_ACTOR_RUN_FAILED.format(run_id=run_id, status=status)
             raise RuntimeError(msg)
diff --git a/langchain_apify/_error_messages.py b/langchain_apify/_error_messages.py
index a87c9cb..0a8c612 100644
--- a/langchain_apify/_error_messages.py
+++ b/langchain_apify/_error_messages.py
@@ -1,4 +1,4 @@
-ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET = (
+_ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET = (
     'APIFY_API_TOKEN environment variable is not set.'
     ' Please set it to your Apify API token by using `os.environ["APIFY_API_TOKEN"] = "YOUR_APIFY_API_TOKEN"'
     ' in your code or pass it as environment variable.'
@@ -6,6 +6,6 @@
     ' `APIFY_API_TOKEN="YOUR_APIFY_API_TOKEN" python your_script.py`'
 )
 
-ERROR_ACTOR_RUN_FAILED = 'Actor run {run_id} ended with status {status}.'
+_ERROR_ACTOR_RUN_FAILED = 'Actor run {run_id} ended with status {status}.'
 
-ERROR_SCRAPE_EMPTY = 'No content extracted from {url}.'
+_ERROR_SCRAPE_EMPTY = 'No content extracted from {url}.'
diff --git a/langchain_apify/utils.py b/langchain_apify/_utils.py
similarity index 94%
rename from langchain_apify/utils.py
rename to langchain_apify/_utils.py
index d3a627f..b19bcbf 100644
--- a/langchain_apify/utils.py
+++ b/langchain_apify/_utils.py
@@ -12,7 +12,7 @@
 _APIFY_API_ENDPOINT_GET_DEFAULT_BUILD: str = 'https://api.apify.com/v2/acts/{actor_id}/builds/default'
 
 
-def prune_actor_input_schema(
+def _prune_actor_input_schema(
     input_schema: dict,
     max_description_len: int = _MAX_DESCRIPTION_LEN,
 ) -> tuple[dict, list[str]]:
@@ -48,7 +48,7 @@ def prune_actor_input_schema(
 T = TypeVar('T', ApifyClient, ApifyClientAsync)
 
 
-def create_apify_client(client_cls: type[T], token: str) -> T:
+def _create_apify_client(client_cls: type[T], token: str) -> T:
     """Create an Apify client instance with a custom user-agent.
 
     Args:
@@ -79,7 +79,7 @@ def create_apify_client(client_cls: type[T], token: str) -> T:
     return client
 
 
-def actor_id_to_tool_name(actor_id: str) -> str:
+def _actor_id_to_tool_name(actor_id: str) -> str:
     """Turn actor_id into a valid tool name.
 
     Tool name must only contain letters, numbers, underscores, dashes,
@@ -95,7 +95,7 @@ def actor_id_to_tool_name(actor_id: str) -> str:
     return 'apify_actor_' + ''.join(char if char in valid_chars else '_' for char in actor_id)
 
 
-def get_actor_latest_build(apify_client: ApifyClient, actor_id: str) -> dict:
+def _get_actor_latest_build(apify_client: ApifyClient, actor_id: str) -> dict:
     """Get the latest build of an Actor from the default build tag.
 
     Args:
diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index 49befb6..8554872 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -8,9 +8,9 @@
 from langchain_core.document_loaders.base import BaseLoader
 from langchain_core.documents import Document  # noqa: TCH002
 from langchain_core.utils import get_from_dict_or_env
-from pydantic import BaseModel, ConfigDict, model_validator
+from pydantic import BaseModel, ConfigDict, Field, model_validator
 
-from langchain_apify.utils import create_apify_client
+from langchain_apify._utils import _create_apify_client
 
 if TYPE_CHECKING:
     from collections.abc import Iterator
@@ -42,8 +42,7 @@ class ApifyDatasetLoader(BaseLoader, BaseModel):
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
-    apify_client: ApifyClient
-    """An instance of the ApifyClient class from the apify-client Python package."""
+    apify_client: ApifyClient = Field(default=None, exclude=True)
     dataset_id: str
     """The ID of the dataset on the Apify platform."""
     dataset_mapping_function: Callable[[dict], Document]
@@ -86,7 +85,7 @@ def validate_environment(cls, values: dict) -> Any:  # noqa: ANN401
         # when running at Apify platform, use APIFY_TOKEN environment variable
         apify_api_token = apify_api_token or os.getenv('APIFY_TOKEN', '')
 
-        client = create_apify_client(ApifyClient, apify_api_token)
+        client = _create_apify_client(ApifyClient, apify_api_token)
 
         values['apify_client'] = client
 
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index f771d35..81d9166 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -10,13 +10,13 @@
 from pydantic import BaseModel, Field, PrivateAttr, create_model
 
 from langchain_apify._client import ApifyToolsClient
-from langchain_apify._error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
-from langchain_apify.utils import (
+from langchain_apify._error_messages import _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+from langchain_apify._utils import (
     _MAX_DESCRIPTION_LEN,
-    actor_id_to_tool_name,
-    create_apify_client,
-    get_actor_latest_build,
-    prune_actor_input_schema,
+    _actor_id_to_tool_name,
+    _create_apify_client,
+    _get_actor_latest_build,
+    _prune_actor_input_schema,
 )
 
 if TYPE_CHECKING:
@@ -57,6 +57,9 @@ class ApifyActorsTool(BaseTool):  # type: ignore[override, override]
                 chunk["messages"][-1].pretty_print()
     """
 
+    _apify_client: ApifyClient = PrivateAttr()
+    _actor_id: str = PrivateAttr()
+
     def __init__(
         self,
         actor_id: str,
@@ -77,14 +80,14 @@ def __init__(
         """
         apify_api_token = apify_api_token or os.getenv('APIFY_API_TOKEN')
         if not apify_api_token:
-            msg = ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+            msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
             raise ValueError(msg)
 
-        apify_client = create_apify_client(ApifyClient, apify_api_token)
+        apify_client = _create_apify_client(ApifyClient, apify_api_token)
 
         kwargs.update(
             {
-                'name': actor_id_to_tool_name(actor_id),
+                'name': _actor_id_to_tool_name(actor_id),
                 'description': self._create_description(apify_client, actor_id),
                 'args_schema': self._build_tool_args_schema_model(
                     apify_client,
@@ -127,7 +130,7 @@ def _create_description(apify_client: ApifyClient, actor_id: str) -> str:
         Returns:
             str: The description.
         """
-        build = get_actor_latest_build(apify_client, actor_id)
+        build = _get_actor_latest_build(apify_client, actor_id)
         actor_description = build.get('actorDefinition', {}).get('description', '')
         if len(actor_description) > _MAX_DESCRIPTION_LEN:
             actor_description = actor_description[:_MAX_DESCRIPTION_LEN] + '...(TRUNCATED, TOO LONG)'
@@ -150,12 +153,12 @@ def _build_tool_args_schema_model(
         Raises:
             ValueError: If the input schema is not found in the Actor build.
         """
-        build = get_actor_latest_build(apify_client, actor_id)
+        build = _get_actor_latest_build(apify_client, actor_id)
         if not (actor_input := build.get('actorDefinition', {}).get('input')):
             msg = f'Input schema not found in the Actor build for Actor: {actor_id}'
             raise ValueError(msg)
 
-        properties, required = prune_actor_input_schema(actor_input)
+        properties, required = _prune_actor_input_schema(actor_input)
         properties = {'run_input': properties}
 
         description = (
diff --git a/langchain_apify/wrappers.py b/langchain_apify/wrappers.py
index ef17873..34370fe 100644
--- a/langchain_apify/wrappers.py
+++ b/langchain_apify/wrappers.py
@@ -5,10 +5,10 @@
 
 from apify_client import ApifyClient, ApifyClientAsync
 from langchain_core.utils import get_from_dict_or_env
-from pydantic import BaseModel, ConfigDict, model_validator
+from pydantic import BaseModel, ConfigDict, Field, model_validator
 
+from langchain_apify._utils import _create_apify_client
 from langchain_apify.document_loaders import ApifyDatasetLoader
-from langchain_apify.utils import create_apify_client
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -53,8 +53,8 @@ class ApifyWrapper(BaseModel):
     # allow arbitrary types in the model config for the apify client fields
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
-    apify_client: ApifyClient
-    apify_client_async: ApifyClientAsync
+    apify_client: ApifyClient = Field(default=None, exclude=True)
+    apify_client_async: ApifyClientAsync = Field(default=None, exclude=True)
     apify_api_token: str | None = None
 
     def __init__(
@@ -90,8 +90,8 @@ def validate_environment(cls, values: dict) -> Any:  # noqa: ANN401
         """
         apify_api_token = get_from_dict_or_env(values, 'apify_api_token', 'APIFY_API_TOKEN')
 
-        values['apify_client'] = create_apify_client(ApifyClient, apify_api_token)
-        values['apify_client_async'] = create_apify_client(ApifyClientAsync, apify_api_token)
+        values['apify_client'] = _create_apify_client(ApifyClient, apify_api_token)
+        values['apify_client_async'] = _create_apify_client(ApifyClientAsync, apify_api_token)
 
         return values
 
diff --git a/tests/integration_tests/test_utils.py b/tests/integration_tests/test_utils.py
index 554cc2d..c92c038 100644
--- a/tests/integration_tests/test_utils.py
+++ b/tests/integration_tests/test_utils.py
@@ -2,8 +2,8 @@
 
 from apify_client.client import ApifyClient
 
-from langchain_apify._error_messages import ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
-from langchain_apify.utils import create_apify_client, get_actor_latest_build
+from langchain_apify._error_messages import _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+from langchain_apify._utils import _create_apify_client, _get_actor_latest_build
 
 
 def test_get_actor_latest_build() -> None:
@@ -13,12 +13,12 @@ def test_get_actor_latest_build() -> None:
         ValueError: If the APIFY_API_TOKEN environment variable is not set.
     """
     if (token := os.getenv('APIFY_API_TOKEN')) is None:
-        msg = ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+        msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
         raise ValueError(msg)
 
-    apify_client = create_apify_client(ApifyClient, token)
+    apify_client = _create_apify_client(ApifyClient, token)
 
-    build = get_actor_latest_build(apify_client, 'apify/rag-web-browser')
+    build = _get_actor_latest_build(apify_client, 'apify/rag-web-browser')
 
     assert isinstance(build, dict)
     assert 'id' in build
diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
index eedadb9..3384e79 100644
--- a/tests/unit_tests/conftest.py
+++ b/tests/unit_tests/conftest.py
@@ -39,7 +39,7 @@ def mock_apify_client() -> MagicMock:
 
 @pytest.fixture
 def client(mock_apify_client: MagicMock) -> ApifyToolsClient:
-    with patch('langchain_apify._client.create_apify_client', return_value=mock_apify_client):
+    with patch('langchain_apify._client._create_apify_client', return_value=mock_apify_client):
         return ApifyToolsClient(apify_api_token='dummy-token')
 
 
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index 1c93f84..40c73dc 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -13,7 +13,7 @@
 
 
 def test_init_with_explicit_token(mock_apify_client: MagicMock) -> None:
-    with patch('langchain_apify._client.create_apify_client', return_value=mock_apify_client) as mock_create:
+    with patch('langchain_apify._client._create_apify_client', return_value=mock_apify_client) as mock_create:
         c = ApifyToolsClient(apify_api_token='my-token')
         mock_create.assert_called_once()
         assert c._client is mock_apify_client
@@ -21,7 +21,7 @@ def test_init_with_explicit_token(mock_apify_client: MagicMock) -> None:
 
 def test_init_with_env_token(monkeypatch: pytest.MonkeyPatch, mock_apify_client: MagicMock) -> None:
     monkeypatch.setenv('APIFY_API_TOKEN', 'env-token')
-    with patch('langchain_apify._client.create_apify_client', return_value=mock_apify_client):
+    with patch('langchain_apify._client._create_apify_client', return_value=mock_apify_client):
         c = ApifyToolsClient()
         assert c._client is mock_apify_client
 
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 6698589..542ec4e 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -11,6 +11,7 @@
 
 from langchain_apify import APIFY_CORE_TOOLS
 from langchain_apify._client import ApifyToolsClient
+from langchain_apify._utils import _actor_id_to_tool_name
 from langchain_apify.tools import (
     ApifyActorsTool,
     ApifyGetDatasetItemsTool,
@@ -23,7 +24,6 @@
     _iso,
     _run_meta,
 )
-from langchain_apify.utils import actor_id_to_tool_name
 from tests.unit_tests.conftest import SAMPLE_ITEMS, SUCCEEDED_RUN, make_tool
 
 if TYPE_CHECKING:
@@ -57,7 +57,7 @@ class DummyModel(BaseModel):
         tool = ApifyActorsTool(actor_id=actor_id, apify_api_token='dummy-token')
         assert isinstance(tool, ApifyActorsTool)
         assert tool.description == 'Mocked description'
-        assert tool.name == actor_id_to_tool_name(actor_id)
+        assert tool.name == _actor_id_to_tool_name(actor_id)
         assert tool.args_schema == DummyModel
 
 

From 2f74c292ccf9422480484ce4921a4d3919c4c672 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 13:08:13 +0200
Subject: [PATCH 30/63] ref: migrate auth to SecretStr + secret_from_env
 pattern

---
 langchain_apify/document_loaders.py | 50 ++++++++++++++++-----------
 langchain_apify/tools.py            | 26 ++++++++++----
 langchain_apify/wrappers.py         | 53 +++++++++++++++--------------
 3 files changed, 76 insertions(+), 53 deletions(-)

diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index 8554872..3a777f3 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -7,9 +7,10 @@
 from apify_client import ApifyClient
 from langchain_core.document_loaders.base import BaseLoader
 from langchain_core.documents import Document  # noqa: TCH002
-from langchain_core.utils import get_from_dict_or_env
-from pydantic import BaseModel, ConfigDict, Field, model_validator
+from langchain_core.utils import secret_from_env
+from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
 
+from langchain_apify._error_messages import _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
 from langchain_apify._utils import _create_apify_client
 
 if TYPE_CHECKING:
@@ -40,8 +41,12 @@ class ApifyDatasetLoader(BaseLoader, BaseModel):
             documents = loader.load()
     """
 
-    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_config = ConfigDict(arbitrary_types_allowed=True, populate_by_name=True)
 
+    apify_api_token: SecretStr | None = Field(
+        default_factory=secret_from_env('APIFY_API_TOKEN', default=None),
+        description='Apify API token. Falls back to APIFY_API_TOKEN / APIFY_TOKEN environment variables.',
+    )
     apify_client: ApifyClient = Field(default=None, exclude=True)
     dataset_id: str
     """The ID of the dataset on the Apify platform."""
@@ -62,7 +67,8 @@ def __init__(
             dataset_mapping_function (Callable): A function that takes a single
                 dictionary (an Apify dataset item) and converts it to an instance
                 of the Document class.
-            apify_api_token (str): Apify API token.
+            apify_api_token (str): Apify API token. Falls back to the
+                ``APIFY_API_TOKEN`` / ``APIFY_TOKEN`` environment variables.
         """
         super().__init__(
             dataset_id=dataset_id,
@@ -70,26 +76,30 @@ def __init__(
             apify_api_token=apify_api_token,
         )
 
-    @model_validator(mode='before')
-    @classmethod
-    def validate_environment(cls, values: dict) -> Any:  # noqa: ANN401
-        """Validate environment.
+    @model_validator(mode='after')
+    def _init_client(self) -> 'ApifyDatasetLoader':
+        """Resolve the Apify API token and initialise the client.
 
-        Args:
-            values (dict): The values to validate.
+        Checks ``APIFY_TOKEN`` as a secondary fallback for code running on the
+        Apify platform where only that variable is set.
 
         Returns:
-            Any: The validated values.
-        """
-        apify_api_token = get_from_dict_or_env(values, 'apify_api_token', 'APIFY_API_TOKEN')
-        # when running at Apify platform, use APIFY_TOKEN environment variable
-        apify_api_token = apify_api_token or os.getenv('APIFY_TOKEN', '')
-
-        client = _create_apify_client(ApifyClient, apify_api_token)
+            ApifyDatasetLoader: The validated loader instance.
 
-        values['apify_client'] = client
-
-        return values
+        Raises:
+            ValueError: If no token is available from any source.
+        """
+        token = self.apify_api_token
+        if token is None:
+            # Secondary fallback for code running on the Apify platform.
+            raw = os.getenv('APIFY_TOKEN')
+            if raw:
+                token = SecretStr(raw)
+        if token is None:
+            msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+            raise ValueError(msg)
+        self.apify_client = _create_apify_client(ApifyClient, token.get_secret_value())
+        return self
 
     def load(self) -> list[Document]:
         """Load documents.
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 81d9166..0097f4c 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -7,7 +7,8 @@
 
 from apify_client import ApifyClient
 from langchain_core.tools import BaseTool, ToolException
-from pydantic import BaseModel, Field, PrivateAttr, create_model
+from langchain_core.utils import secret_from_env
+from pydantic import BaseModel, Field, PrivateAttr, SecretStr, create_model
 
 from langchain_apify._client import ApifyToolsClient
 from langchain_apify._error_messages import _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
@@ -78,12 +79,16 @@ def __init__(
         Raises:
             ValueError: If the `APIFY_API_TOKEN` environment variable is not set
         """
-        apify_api_token = apify_api_token or os.getenv('APIFY_API_TOKEN')
-        if not apify_api_token:
+        _raw_token: str | None = (
+            apify_api_token.get_secret_value()
+            if isinstance(apify_api_token, SecretStr)
+            else apify_api_token or os.getenv('APIFY_API_TOKEN')
+        )
+        if not _raw_token:
             msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
             raise ValueError(msg)
 
-        apify_client = _create_apify_client(ApifyClient, apify_api_token)
+        apify_client = _create_apify_client(ApifyClient, _raw_token)
 
         kwargs.update(
             {
@@ -305,15 +310,22 @@ class _ApifyGenericTool(BaseTool):  # type: ignore[override]
 
     handle_tool_error: bool = True
 
+    apify_api_token: SecretStr | None = Field(
+        default_factory=secret_from_env('APIFY_API_TOKEN', default=None),
+        description='Apify API token. Falls back to the APIFY_API_TOKEN environment variable when None.',
+    )
     max_timeout_secs: int = Field(default=600, description='Upper bound for timeout_secs the LLM may request.')
     max_memory_mbytes: int = Field(default=32768, description='Upper bound for memory_mbytes the LLM may request.')
     max_items: int = Field(default=1000, description='Upper bound for limit / dataset_items_limit the LLM may request.')
 
     _client: ApifyToolsClient = PrivateAttr()
 
-    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
-        super().__init__(**kwargs)
-        self._client = ApifyToolsClient(apify_api_token=apify_api_token)
+    def model_post_init(self, __context: Any) -> None:  # noqa: ANN401
+        if self.apify_api_token is None:
+            msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+            raise ValueError(msg)
+        self._client = ApifyToolsClient(apify_api_token=self.apify_api_token.get_secret_value())
+        super().model_post_init(__context)
 
     def _clamp_timeout(self, value: int) -> int:
         return min(value, self.max_timeout_secs)
diff --git a/langchain_apify/wrappers.py b/langchain_apify/wrappers.py
index 34370fe..9af591a 100644
--- a/langchain_apify/wrappers.py
+++ b/langchain_apify/wrappers.py
@@ -4,9 +4,10 @@
 from typing import TYPE_CHECKING, Any
 
 from apify_client import ApifyClient, ApifyClientAsync
-from langchain_core.utils import get_from_dict_or_env
-from pydantic import BaseModel, ConfigDict, Field, model_validator
+from langchain_core.utils import secret_from_env
+from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
 
+from langchain_apify._error_messages import _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
 from langchain_apify._utils import _create_apify_client
 from langchain_apify.document_loaders import ApifyDatasetLoader
 
@@ -51,11 +52,14 @@ class ApifyWrapper(BaseModel):
     """
 
     # allow arbitrary types in the model config for the apify client fields
-    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_config = ConfigDict(arbitrary_types_allowed=True, populate_by_name=True)
 
+    apify_api_token: SecretStr | None = Field(
+        default_factory=secret_from_env('APIFY_API_TOKEN', default=None),
+        description='Apify API token. Falls back to the APIFY_API_TOKEN environment variable when None.',
+    )
     apify_client: ApifyClient = Field(default=None, exclude=True)
     apify_client_async: ApifyClientAsync = Field(default=None, exclude=True)
-    apify_api_token: str | None = None
 
     def __init__(
         self,
@@ -63,37 +67,34 @@ def __init__(
         *args: Any,  # noqa: ANN401
         **kwargs: Any,  # noqa: ANN401
     ) -> None:
-        """Initialize the loader with an Apify dataset ID and a mapping function.
+        """Initialise the wrapper.
 
         Args:
-            dataset_id (str): The ID of the dataset on the Apify platform.
-            dataset_mapping_function (Callable): A function that takes a single
-                dictionary (an Apify dataset item) and converts it to an instance
-                of the Document class.
-            apify_api_token (Optional[str]): Apify API token.
-            *args: Any: Additional positional arguments.
-            **kwargs: Any: Additional keyword arguments.
+            apify_api_token (Optional[str]): Apify API token. Falls back to the
+                ``APIFY_API_TOKEN`` environment variable when *None*.
+            *args: Any: Additional positional arguments forwarded to Pydantic.
+            **kwargs: Any: Additional keyword arguments forwarded to Pydantic.
         """
         kwargs.update({'apify_api_token': apify_api_token})
         super().__init__(*args, **kwargs)
 
-    @model_validator(mode='before')
-    @classmethod
-    def validate_environment(cls, values: dict) -> Any:  # noqa: ANN401
-        """Validate environment.
-
-        Validate that an Apify API token is set and the apify-client
-        Python package exists in the current environment.
+    @model_validator(mode='after')
+    def _init_clients(self) -> 'ApifyWrapper':
+        """Validate the token and initialise both sync and async Apify clients.
 
         Returns:
-            Any: The validated values.
-        """
-        apify_api_token = get_from_dict_or_env(values, 'apify_api_token', 'APIFY_API_TOKEN')
+            ApifyWrapper: The validated wrapper instance.
 
-        values['apify_client'] = _create_apify_client(ApifyClient, apify_api_token)
-        values['apify_client_async'] = _create_apify_client(ApifyClientAsync, apify_api_token)
-
-        return values
+        Raises:
+            ValueError: If no token is provided and APIFY_API_TOKEN is not set.
+        """
+        if self.apify_api_token is None:
+            msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+            raise ValueError(msg)
+        token = self.apify_api_token.get_secret_value()
+        self.apify_client = _create_apify_client(ApifyClient, token)
+        self.apify_client_async = _create_apify_client(ApifyClientAsync, token)
+        return self
 
     def call_actor(  # noqa: PLR0913
         self,

From 6258b2b9ad8ed2dffd09918929ef9ec7d7893f4c Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 13:38:37 +0200
Subject: [PATCH 31/63] fix: backward-compat fix

---
 langchain_apify/document_loaders.py | 14 +++++++++-----
 langchain_apify/wrappers.py         |  5 ++++-
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index 3a777f3..4e286af 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -70,11 +70,15 @@ def __init__(
             apify_api_token (str): Apify API token. Falls back to the
                 ``APIFY_API_TOKEN`` / ``APIFY_TOKEN`` environment variables.
         """
-        super().__init__(
-            dataset_id=dataset_id,
-            dataset_mapping_function=dataset_mapping_function,
-            apify_api_token=apify_api_token,
-        )
+        init_kwargs: dict[str, Any] = {
+            'dataset_id': dataset_id,
+            'dataset_mapping_function': dataset_mapping_function,
+        }
+        # Only forward the token when explicitly provided; otherwise let the
+        # Pydantic ``default_factory`` read it from the environment.
+        if apify_api_token is not None:
+            init_kwargs['apify_api_token'] = apify_api_token
+        super().__init__(**init_kwargs)
 
     @model_validator(mode='after')
     def _init_client(self) -> 'ApifyDatasetLoader':
diff --git a/langchain_apify/wrappers.py b/langchain_apify/wrappers.py
index 9af591a..a1e0ab6 100644
--- a/langchain_apify/wrappers.py
+++ b/langchain_apify/wrappers.py
@@ -75,7 +75,10 @@ def __init__(
             *args: Any: Additional positional arguments forwarded to Pydantic.
             **kwargs: Any: Additional keyword arguments forwarded to Pydantic.
         """
-        kwargs.update({'apify_api_token': apify_api_token})
+        # Only forward the token when explicitly provided; otherwise let the
+        # Pydantic ``default_factory`` read it from the environment.
+        if apify_api_token is not None:
+            kwargs['apify_api_token'] = apify_api_token
         super().__init__(*args, **kwargs)
 
     @model_validator(mode='after')

From 2905b679a7240b6286229a86f182ea0eddd3ac37 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 13:58:21 +0200
Subject: [PATCH 32/63] fix: update stale doc string

---
 langchain_apify/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langchain_apify/_utils.py b/langchain_apify/_utils.py
index b19bcbf..9d74487 100644
--- a/langchain_apify/_utils.py
+++ b/langchain_apify/_utils.py
@@ -18,7 +18,7 @@ def _prune_actor_input_schema(
 ) -> tuple[dict, list[str]]:
     """Get the input schema from the Actor build.
 
-    Trim the description to 250 characters.
+    Trim descriptions to ``_MAX_DESCRIPTION_LEN`` characters.
 
     Args:
         input_schema (dict): The input schema from the Actor build.

From 3238c0203e9965ad1c34dc004c9bd729714b0b1f Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 13:59:06 +0200
Subject: [PATCH 33/63] chore: removed redundant file

---
 langchain_apify/_actor_tools.py | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 langchain_apify/_actor_tools.py

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
deleted file mode 100644
index a989b11..0000000
--- a/langchain_apify/_actor_tools.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""Actor-specific tool subclasses (search, social-media, etc.).
-
-Downstream feature branches add concrete tools here.  They inherit from
-:class:`~langchain_apify.tools._ApifyGenericTool` and use
-:func:`~langchain_apify.tools._run_meta` to format run metadata.
-"""
-
-from __future__ import annotations
-
-# ---------------------------------------------------------------------------
-# Search & Crawling tools
-# ---------------------------------------------------------------------------
-
-
-# ---------------------------------------------------------------------------
-# Social-media tools
-# ---------------------------------------------------------------------------

From 92df406a8fde1996c1fe71713f16e1d2533d36dc Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 14:19:12 +0200
Subject: [PATCH 34/63] fix: extracted repeated code, fixed secretstr
 compatibility to apifytoolsclient

---
 langchain_apify/_client.py | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index 84e840a..a828be2 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -3,6 +3,7 @@
 import os
 
 from apify_client import ApifyClient
+from pydantic import SecretStr
 
 from langchain_apify._error_messages import (
     _ERROR_ACTOR_RUN_FAILED,
@@ -32,12 +33,17 @@ class ApifyToolsClient:
         ValueError: If no token is provided and the env var is not set.
     """
 
-    def __init__(self, apify_api_token: str | None = None) -> None:
-        token = apify_api_token or os.getenv('APIFY_API_TOKEN')
-        if not token:
+    def __init__(self, apify_api_token: SecretStr | str | None = None) -> None:
+        _token: str | None = None
+        if isinstance(apify_api_token, SecretStr):
+            _token = apify_api_token.get_secret_value()
+        else:
+            _token = apify_api_token or os.getenv('APIFY_API_TOKEN')
+        
+        if not _token:
             msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
             raise ValueError(msg)
-        self._client = _create_apify_client(ApifyClient, token)
+        self._client = _create_apify_client(ApifyClient, _token)
 
     def run_actor(
         self,
@@ -117,17 +123,12 @@ def run_actor_and_get_items(
         Raises:
             RuntimeError: If the run does not finish with status ``SUCCEEDED``.
         """
-        # run_actor() raises RuntimeError on Actor failure; the except below only covers the dataset fetch.
         run = self.run_actor(actor_id, run_input, timeout_secs, memory_mbytes)
         dataset_id = run.get('defaultDatasetId')
         if not dataset_id:
             msg = f'Actor {actor_id} run succeeded but returned no default dataset ID.'
             raise RuntimeError(msg)
-        try:
-            items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
-        except Exception as exc:
-            msg = f'Network error fetching dataset {dataset_id}: {exc}'
-            raise RuntimeError(msg) from exc
+        items = self._list_items_or_raise(dataset_id, dataset_items_limit)
         return run, items
 
     def run_task(
@@ -191,17 +192,12 @@ def run_task_and_get_items(
         Raises:
             RuntimeError: If the run does not finish with status ``SUCCEEDED``.
         """
-        # run_task() raises RuntimeError on task failure; the except below only covers the dataset fetch.
         run = self.run_task(task_id, task_input, timeout_secs, memory_mbytes)
         dataset_id = run.get('defaultDatasetId')
         if not dataset_id:
             msg = f'Task {task_id} run succeeded but returned no default dataset ID.'
             raise RuntimeError(msg)
-        try:
-            items = self._client.dataset(dataset_id).list_items(limit=dataset_items_limit, clean=True).items
-        except Exception as exc:
-            msg = f'Network error fetching dataset {dataset_id}: {exc}'
-            raise RuntimeError(msg) from exc
+        items = self._list_items_or_raise(dataset_id, dataset_items_limit)
         return run, items
 
     def scrape_url(self, url: str, timeout_secs: int = _DEFAULT_SCRAPE_TIMEOUT_SECS) -> str:
@@ -239,6 +235,14 @@ def scrape_url(self, url: str, timeout_secs: int = _DEFAULT_SCRAPE_TIMEOUT_SECS)
             raise RuntimeError(msg)
         return content
 
+    def _list_items_or_raise(self, dataset_id: str, limit: int) -> list[dict]:
+        """Fetch dataset items, wrapping any network error in a RuntimeError."""
+        try:
+            return self._client.dataset(dataset_id).list_items(limit=limit, clean=True).items
+        except Exception as exc:
+            msg = f'Network error fetching dataset {dataset_id}: {exc}'
+            raise RuntimeError(msg) from exc
+
     @staticmethod
     def _check_run_status(run: dict) -> None:
         """Raise if the run did not succeed."""

From 3a0f666d08f4f6e05ec382699fe03a4d3e1e9414 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 15:02:43 +0200
Subject: [PATCH 35/63] fix: set min value to timeout, memory and items, add
 exlude and repr to apify_api_token

---
 langchain_apify/tools.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 0097f4c..3d7af3c 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -313,6 +313,8 @@ class _ApifyGenericTool(BaseTool):  # type: ignore[override]
     apify_api_token: SecretStr | None = Field(
         default_factory=secret_from_env('APIFY_API_TOKEN', default=None),
         description='Apify API token. Falls back to the APIFY_API_TOKEN environment variable when None.',
+        exclude=True,
+        repr=False,
     )
     max_timeout_secs: int = Field(default=600, description='Upper bound for timeout_secs the LLM may request.')
     max_memory_mbytes: int = Field(default=32768, description='Upper bound for memory_mbytes the LLM may request.')
@@ -328,15 +330,15 @@ def model_post_init(self, __context: Any) -> None:  # noqa: ANN401
         super().model_post_init(__context)
 
     def _clamp_timeout(self, value: int) -> int:
-        return min(value, self.max_timeout_secs)
+        return max(1, min(value, self.max_timeout_secs))
 
     def _clamp_memory(self, value: int | None) -> int | None:
         if value is None:
             return None
-        return min(value, self.max_memory_mbytes)
+        return max(1, min(value, self.max_memory_mbytes))
 
     def _clamp_items(self, value: int) -> int:
-        return min(value, self.max_items)
+        return max(1, min(value, self.max_items))
 
 
 # ---------------------------------------------------------------------------

From 8614cfdbb54d4eff228b80c20668ed81e21cffb0 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 15:03:29 +0200
Subject: [PATCH 36/63] feat: added repr and exclude to apify api token

---
 langchain_apify/document_loaders.py | 2 ++
 langchain_apify/wrappers.py         | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index 4e286af..400476e 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -46,6 +46,8 @@ class ApifyDatasetLoader(BaseLoader, BaseModel):
     apify_api_token: SecretStr | None = Field(
         default_factory=secret_from_env('APIFY_API_TOKEN', default=None),
         description='Apify API token. Falls back to APIFY_API_TOKEN / APIFY_TOKEN environment variables.',
+        exclude=True,
+        repr=False,
     )
     apify_client: ApifyClient = Field(default=None, exclude=True)
     dataset_id: str
diff --git a/langchain_apify/wrappers.py b/langchain_apify/wrappers.py
index a1e0ab6..e4cafb6 100644
--- a/langchain_apify/wrappers.py
+++ b/langchain_apify/wrappers.py
@@ -57,6 +57,8 @@ class ApifyWrapper(BaseModel):
     apify_api_token: SecretStr | None = Field(
         default_factory=secret_from_env('APIFY_API_TOKEN', default=None),
         description='Apify API token. Falls back to the APIFY_API_TOKEN environment variable when None.',
+        exclude=True,
+        repr=False,
     )
     apify_client: ApifyClient = Field(default=None, exclude=True)
     apify_client_async: ApifyClientAsync = Field(default=None, exclude=True)

From 2bf130a9c98a8d3c7436cf8e4daf14d9d5fc20c4 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 15:03:59 +0200
Subject: [PATCH 37/63] feat: add type checking to apify core tools list

---
 langchain_apify/__init__.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
index fa1f369..bca8081 100644
--- a/langchain_apify/__init__.py
+++ b/langchain_apify/__init__.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from importlib import metadata
+from typing import TYPE_CHECKING
 
 from langchain_apify.document_loaders import ApifyDatasetLoader
 from langchain_apify.tools import (
@@ -14,6 +15,9 @@
 )
 from langchain_apify.wrappers import ApifyWrapper
 
+if TYPE_CHECKING:
+    from langchain_core.tools import BaseTool
+
 try:
     __version__ = metadata.version(__package__)
 except metadata.PackageNotFoundError:
@@ -24,7 +28,7 @@
 # Binding all tools at once overwhelms the LLM context window;
 # pick the group(s) relevant to your use case.
 
-APIFY_CORE_TOOLS: list[type] = [
+APIFY_CORE_TOOLS: list[type[BaseTool]] = [
     ApifyRunActorTool,
     ApifyGetDatasetItemsTool,
     ApifyRunActorAndGetItemsTool,

From 98293d4cfa1e0ca6c488380ff93b5033b2d96314 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 15:05:05 +0200
Subject: [PATCH 38/63] feat: add tests for clamped values and apify api token

---
 tests/unit_tests/test_document_loaders.py | 25 +++++++++++++
 tests/unit_tests/test_tools.py            | 44 +++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/tests/unit_tests/test_document_loaders.py b/tests/unit_tests/test_document_loaders.py
index a6c7a61..49ee9db 100644
--- a/tests/unit_tests/test_document_loaders.py
+++ b/tests/unit_tests/test_document_loaders.py
@@ -1,5 +1,6 @@
 from unittest.mock import patch
 
+import pytest
 from apify_client._types import ListPage
 from apify_client.clients import DatasetClient
 from langchain_core.documents import Document
@@ -55,3 +56,27 @@ def test_apify_dataset_loader_lazy_load() -> None:
         mock_list_items.assert_called_once()
         assert documents[0].page_content == 'Apify is great!'
         assert documents[0].metadata['source'] == 'https://apify.com'
+
+
+def test_apify_dataset_loader_apify_token_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Loader should accept APIFY_TOKEN as a secondary env-var fallback."""
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    monkeypatch.setenv('APIFY_TOKEN', 'platform-token')
+
+    with patch.object(DatasetClient, 'list_items') as mock_list_items:
+        mock_list_items.return_value = ListPage(data={'items': []})
+        loader = ApifyDatasetLoader(
+            dataset_id='d',
+            dataset_mapping_function=lambda _item: Document(page_content='x'),
+        )
+        assert loader.load() == []
+
+
+def test_apify_dataset_loader_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    monkeypatch.delenv('APIFY_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyDatasetLoader(
+            dataset_id='d',
+            dataset_mapping_function=lambda _item: Document(page_content='x'),
+        )
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 542ec4e..3c99a71 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -472,6 +472,42 @@ def test_run_task_and_get_items_tool_clamps_all(mock_tools_client: MagicMock) ->
     mock_tools_client.run_task_and_get_items.assert_called_once_with('t/1', None, 30, 256, 5)
 
 
+def test_clamp_timeout_floor_is_one(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunActorTool, mock_tools_client, max_timeout_secs=600)
+
+    tool._run(actor_id='apify/test', timeout_secs=-1)
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 1, None)
+
+    mock_tools_client.run_actor.reset_mock()
+    tool._run(actor_id='apify/test', timeout_secs=0)
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 1, None)
+
+
+def test_clamp_memory_floor_is_one(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunActorTool, mock_tools_client, max_memory_mbytes=4096)
+
+    tool._run(actor_id='apify/test', memory_mbytes=-1)
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, 1)
+
+    mock_tools_client.run_actor.reset_mock()
+    tool._run(actor_id='apify/test', memory_mbytes=0)
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, 1)
+
+
+def test_clamp_items_floor_is_one(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.get_dataset_items.return_value = SAMPLE_ITEMS
+    tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client, max_items=100)
+
+    tool._run(dataset_id='ds-1', limit=-1)
+    mock_tools_client.get_dataset_items.assert_called_once_with('ds-1', 1, 0)
+
+    mock_tools_client.get_dataset_items.reset_mock()
+    tool._run(dataset_id='ds-1', limit=0)
+    mock_tools_client.get_dataset_items.assert_called_once_with('ds-1', 1, 0)
+
+
 def test_values_below_max_pass_through(mock_tools_client: MagicMock) -> None:
     """When LLM values are within limits they should pass through unchanged."""
     mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
@@ -515,6 +551,14 @@ def test_generic_tools_have_correct_metadata() -> None:
         assert tool.handle_tool_error is True
 
 
+def test_apify_api_token_excluded_from_model_dump() -> None:
+    """The apify_api_token field must not appear in model_dump() output."""
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        tool = ApifyRunActorTool(apify_api_token='x')  # type: ignore[call-arg]
+    dumped = tool.model_dump()
+    assert 'apify_api_token' not in dumped
+
+
 # ---------------------------------------------------------------------------
 # _ApifyGenericTool inheritance
 # ---------------------------------------------------------------------------

From 863ed8d31b64457635a52abc0402918a37e1bc4a Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Thu, 23 Apr 2026 15:20:48 +0200
Subject: [PATCH 39/63] fix: lint fix

---
 langchain_apify/_client.py          |  2 +-
 langchain_apify/document_loaders.py |  4 ++--
 langchain_apify/tools.py            |  2 +-
 langchain_apify/wrappers.py         |  6 +++---
 tests/unit_tests/test_tools.py      | 14 +++++++-------
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index a828be2..9a87d46 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -39,7 +39,7 @@ def __init__(self, apify_api_token: SecretStr | str | None = None) -> None:
             _token = apify_api_token.get_secret_value()
         else:
             _token = apify_api_token or os.getenv('APIFY_API_TOKEN')
-        
+
         if not _token:
             msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
             raise ValueError(msg)
diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index 400476e..131950d 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -49,7 +49,7 @@ class ApifyDatasetLoader(BaseLoader, BaseModel):
         exclude=True,
         repr=False,
     )
-    apify_client: ApifyClient = Field(default=None, exclude=True)
+    apify_client: ApifyClient = Field(default=None, exclude=True)  # type: ignore[assignment]
     dataset_id: str
     """The ID of the dataset on the Apify platform."""
     dataset_mapping_function: Callable[[dict], Document]
@@ -83,7 +83,7 @@ def __init__(
         super().__init__(**init_kwargs)
 
     @model_validator(mode='after')
-    def _init_client(self) -> 'ApifyDatasetLoader':
+    def _init_client(self) -> ApifyDatasetLoader:
         """Resolve the Apify API token and initialise the client.
 
         Checks ``APIFY_TOKEN`` as a secondary fallback for code running on the
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 3d7af3c..e7721b7 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -64,7 +64,7 @@ class ApifyActorsTool(BaseTool):  # type: ignore[override, override]
     def __init__(
         self,
         actor_id: str,
-        apify_api_token: str | None = None,
+        apify_api_token: str | SecretStr | None = None,
         *args: Any,  # noqa: ANN401
         **kwargs: Any,  # noqa: ANN401
     ) -> None:
diff --git a/langchain_apify/wrappers.py b/langchain_apify/wrappers.py
index e4cafb6..d5fd25c 100644
--- a/langchain_apify/wrappers.py
+++ b/langchain_apify/wrappers.py
@@ -60,8 +60,8 @@ class ApifyWrapper(BaseModel):
         exclude=True,
         repr=False,
     )
-    apify_client: ApifyClient = Field(default=None, exclude=True)
-    apify_client_async: ApifyClientAsync = Field(default=None, exclude=True)
+    apify_client: ApifyClient = Field(default=None, exclude=True)  # type: ignore[assignment]
+    apify_client_async: ApifyClientAsync = Field(default=None, exclude=True)  # type: ignore[assignment]
 
     def __init__(
         self,
@@ -84,7 +84,7 @@ def __init__(
         super().__init__(*args, **kwargs)
 
     @model_validator(mode='after')
-    def _init_clients(self) -> 'ApifyWrapper':
+    def _init_clients(self) -> ApifyWrapper:
         """Validate the token and initialise both sync and async Apify clients.
 
         Returns:
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 3c99a71..67fa1a7 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -527,12 +527,12 @@ def test_generic_tools_have_correct_metadata() -> None:
     """Verify name, description, and args_schema are set on all generic tools."""
     with patch.object(ApifyToolsClient, '__init__', return_value=None):
         tools = [
-            ApifyRunActorTool(apify_api_token='dummy'),  # type: ignore[call-arg]
-            ApifyGetDatasetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg]
-            ApifyRunActorAndGetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg]
-            ApifyScrapeUrlTool(apify_api_token='dummy'),  # type: ignore[call-arg]
-            ApifyRunTaskTool(apify_api_token='dummy'),  # type: ignore[call-arg]
-            ApifyRunTaskAndGetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg]
+            ApifyRunActorTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
+            ApifyGetDatasetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
+            ApifyRunActorAndGetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
+            ApifyScrapeUrlTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
+            ApifyRunTaskTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
+            ApifyRunTaskAndGetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
         ]
 
     expected_names = [
@@ -554,7 +554,7 @@ def test_generic_tools_have_correct_metadata() -> None:
 def test_apify_api_token_excluded_from_model_dump() -> None:
     """The apify_api_token field must not appear in model_dump() output."""
     with patch.object(ApifyToolsClient, '__init__', return_value=None):
-        tool = ApifyRunActorTool(apify_api_token='x')  # type: ignore[call-arg]
+        tool = ApifyRunActorTool(apify_api_token='x')  # type: ignore[call-arg,arg-type]
     dumped = tool.model_dump()
     assert 'apify_api_token' not in dumped
 

From 70527e0d839b02c1399620d76c6c599aa55434a3 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Fri, 24 Apr 2026 09:56:43 +0200
Subject: [PATCH 40/63] ref: update apify_api_token type to support SecretStr
 in document loaders

---
 langchain_apify/document_loaders.py | 4 ++--
 langchain_apify/wrappers.py         | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index 131950d..6439740 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -60,7 +60,7 @@ def __init__(
         self,
         dataset_id: str,
         dataset_mapping_function: Callable[[dict], Document],
-        apify_api_token: str | None = None,
+        apify_api_token: str | SecretStr | None = None,
     ) -> None:
         """Initialize the loader with an Apify dataset ID and a mapping function.
 
@@ -69,7 +69,7 @@ def __init__(
             dataset_mapping_function (Callable): A function that takes a single
                 dictionary (an Apify dataset item) and converts it to an instance
                 of the Document class.
-            apify_api_token (str): Apify API token. Falls back to the
+            apify_api_token (str | SecretStr): Apify API token. Falls back to the
                 ``APIFY_API_TOKEN`` / ``APIFY_TOKEN`` environment variables.
         """
         init_kwargs: dict[str, Any] = {
diff --git a/langchain_apify/wrappers.py b/langchain_apify/wrappers.py
index d5fd25c..57a9eeb 100644
--- a/langchain_apify/wrappers.py
+++ b/langchain_apify/wrappers.py
@@ -65,15 +65,15 @@ class ApifyWrapper(BaseModel):
 
     def __init__(
         self,
-        apify_api_token: str | None = None,
+        apify_api_token: str | SecretStr | None = None,
         *args: Any,  # noqa: ANN401
         **kwargs: Any,  # noqa: ANN401
     ) -> None:
         """Initialise the wrapper.
 
         Args:
-            apify_api_token (Optional[str]): Apify API token. Falls back to the
-                ``APIFY_API_TOKEN`` environment variable when *None*.
+            apify_api_token (Optional[str | SecretStr]): Apify API token. Falls
+                back to the ``APIFY_API_TOKEN`` environment variable when *None*.
             *args: Any: Additional positional arguments forwarded to Pydantic.
             **kwargs: Any: Additional keyword arguments forwarded to Pydantic.
         """

From f005bc557f21b86f5bba7e53b1625f96ffb45783 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Fri, 24 Apr 2026 11:35:00 +0200
Subject: [PATCH 41/63] fix: turn off logger for ApifySearchRetrieval

---
 langchain_apify/retrievers.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/langchain_apify/retrievers.py b/langchain_apify/retrievers.py
index 1dc8aa7..709fd5e 100644
--- a/langchain_apify/retrievers.py
+++ b/langchain_apify/retrievers.py
@@ -80,6 +80,7 @@ def _get_relevant_documents(
         run = self._sync_client.actor(_RAG_WEB_BROWSER_ACTOR_ID).call(
             run_input=run_input,
             timeout_secs=self.timeout_secs,
+            logger=None,
         )
         if run is None:
             return []
@@ -106,6 +107,7 @@ async def _aget_relevant_documents(
         run = await self._async_client.actor(_RAG_WEB_BROWSER_ACTOR_ID).call(
             run_input=run_input,
             timeout_secs=self.timeout_secs,
+            logger=None,
         )
         if run is None:
             return []

From dd08098667a5b058f306adb467f59e22fe04e7f6 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Fri, 24 Apr 2026 12:03:23 +0200
Subject: [PATCH 42/63] fix: fix lint errors

---
 langchain_apify/_actor_tools.py           |  2 +-
 langchain_apify/_client.py                | 17 +++++++++--------
 langchain_apify/document_loaders.py       |  4 ++--
 langchain_apify/retrievers.py             | 18 ++++++++++++------
 tests/unit_tests/test_actor_tools.py      |  5 +++--
 tests/unit_tests/test_document_loaders.py |  5 +++--
 tests/unit_tests/test_retrievers.py       | 17 ++++++++++-------
 7 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
index c62d912..43a73bc 100644
--- a/langchain_apify/_actor_tools.py
+++ b/langchain_apify/_actor_tools.py
@@ -11,7 +11,6 @@
 from typing import TYPE_CHECKING
 
 from langchain_core.tools import ToolException
-from pydantic import BaseModel
 
 from langchain_apify.tools import (
     ApifyGoogleSearchInput,
@@ -21,6 +20,7 @@
 
 if TYPE_CHECKING:
     from langchain_core.callbacks import CallbackManagerForToolRun
+    from pydantic import BaseModel
 
 # ---------------------------------------------------------------------------
 # Search & Crawling tools
diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index da79357..0251fb3 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -281,14 +281,15 @@ def google_search(
             timeout_secs=timeout_secs,
             dataset_items_limit=max_results,
         )
-        results: list[dict] = []
-        for item in items:
-            for organic in item.get('organicResults', []):
-                results.append({
-                    'title': organic.get('title', ''),
-                    'url': organic.get('url', ''),
-                    'description': organic.get('description', ''),
-                })
+        results: list[dict] = [
+            {
+                'title': organic.get('title', ''),
+                'url': organic.get('url', ''),
+                'description': organic.get('description', ''),
+            }
+            for item in items
+            for organic in item.get('organicResults', [])
+        ]
         return results[:max_results]
 
     def rag_web_search(
diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index cba017c..014998e 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -6,7 +6,7 @@
 
 from apify_client import ApifyClient
 from langchain_core.document_loaders.base import BaseLoader
-from langchain_core.documents import Document  # noqa: TCH002
+from langchain_core.documents import Document
 from langchain_core.utils import secret_from_env
 from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
 
@@ -166,7 +166,7 @@ class ApifyCrawlLoader(BaseLoader):
             documents = loader.load()
     """
 
-    def __init__(
+    def __init__(  # noqa: PLR0913
         self,
         url: str,
         apify_api_token: str | None = None,
diff --git a/langchain_apify/retrievers.py b/langchain_apify/retrievers.py
index 709fd5e..9d4c30e 100644
--- a/langchain_apify/retrievers.py
+++ b/langchain_apify/retrievers.py
@@ -71,7 +71,7 @@ def _get_relevant_documents(
         self,
         query: str,
         *,
-        run_manager: CallbackManagerForRetrieverRun | None = None,
+        run_manager: CallbackManagerForRetrieverRun | None = None,  # noqa: ARG002
     ) -> list[Document]:
         run_input = {
             'query': query,
@@ -89,16 +89,21 @@ def _get_relevant_documents(
         if not dataset_id:
             return []
 
-        items = self._sync_client.dataset(dataset_id).list_items(
-            limit=self.max_results, clean=True,
-        ).items
+        items = (
+            self._sync_client.dataset(dataset_id)
+            .list_items(
+                limit=self.max_results,
+                clean=True,
+            )
+            .items
+        )
         return self._items_to_documents(items)
 
     async def _aget_relevant_documents(
         self,
         query: str,
         *,
-        run_manager: AsyncCallbackManagerForRetrieverRun | None = None,
+        run_manager: AsyncCallbackManagerForRetrieverRun | None = None,  # noqa: ARG002
     ) -> list[Document]:
         run_input = {
             'query': query,
@@ -118,7 +123,8 @@ async def _aget_relevant_documents(
 
         items = (
             await self._async_client.dataset(dataset_id).list_items(
-                limit=self.max_results, clean=True,
+                limit=self.max_results,
+                clean=True,
             )
         ).items
         return self._items_to_documents(items)
diff --git a/tests/unit_tests/test_actor_tools.py b/tests/unit_tests/test_actor_tools.py
index 2e5fbe4..6448d89 100644
--- a/tests/unit_tests/test_actor_tools.py
+++ b/tests/unit_tests/test_actor_tools.py
@@ -5,6 +5,7 @@
 
 import pytest
 from langchain_core.tools import ToolException
+from pydantic import SecretStr
 
 from langchain_apify import APIFY_ACTOR_TOOLS, ApifyGoogleSearchTool, ApifyWebCrawlerTool
 from langchain_apify._client import ApifyToolsClient
@@ -167,8 +168,8 @@ def test_actor_tools_inherit_from_generic_base() -> None:
 def test_actor_tools_have_correct_metadata() -> None:
     with patch.object(ApifyToolsClient, '__init__', return_value=None):
         tools = [
-            ApifyGoogleSearchTool(apify_api_token='dummy'),
-            ApifyWebCrawlerTool(apify_api_token='dummy'),
+            ApifyGoogleSearchTool(apify_api_token=SecretStr('dummy')),
+            ApifyWebCrawlerTool(apify_api_token=SecretStr('dummy')),
         ]
 
     expected_names = ['apify_google_search', 'apify_web_crawler']
diff --git a/tests/unit_tests/test_document_loaders.py b/tests/unit_tests/test_document_loaders.py
index 00ecc0f..2bdcc8f 100644
--- a/tests/unit_tests/test_document_loaders.py
+++ b/tests/unit_tests/test_document_loaders.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from typing import Any
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -85,7 +86,7 @@ def test_apify_dataset_loader_lazy_load() -> None:
 
 def _make_crawl_loader(
     mock_client: MagicMock,
-    **kwargs: object,
+    **kwargs: Any,  # noqa: ANN401
 ) -> ApifyCrawlLoader:
     with patch.object(ApifyToolsClient, '__init__', return_value=None):
         loader = ApifyCrawlLoader(url='https://example.com', apify_api_token='dummy', **kwargs)
@@ -184,7 +185,7 @@ def test_crawl_loader_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
         ApifyCrawlLoader(url='https://example.com')
 
 
-def test_crawl_loader_failure_raises(mock_tools_client: MagicMock) -> None:
+def test_crawl_loader_failure_raises() -> None:
     mock_client = MagicMock(spec=ApifyToolsClient)
     mock_client.crawl_website.side_effect = RuntimeError('Actor run run-bad ended with status FAILED.')
     loader = _make_crawl_loader(mock_client)
diff --git a/tests/unit_tests/test_retrievers.py b/tests/unit_tests/test_retrievers.py
index 266ded7..8eede95 100644
--- a/tests/unit_tests/test_retrievers.py
+++ b/tests/unit_tests/test_retrievers.py
@@ -60,7 +60,7 @@ def test_init_custom_params() -> None:
 
 
 # ---------------------------------------------------------------------------
-# _get_relevant_documents (sync)
+# Sync retrieval
 # ---------------------------------------------------------------------------
 
 
@@ -100,7 +100,8 @@ def test_sync_passes_correct_input() -> None:
         timeout_secs=60,
     )
     mock_client.dataset.return_value.list_items.assert_called_once_with(
-        limit=3, clean=True,
+        limit=3,
+        clean=True,
     )
 
 
@@ -145,11 +146,13 @@ def test_sync_no_dataset_id_returns_empty() -> None:
 @pytest.mark.asyncio
 async def test_async_returns_documents() -> None:
     mock_async = MagicMock()
-    mock_async.actor.return_value.call = AsyncMock(return_value={
-        'id': 'run-1',
-        'status': 'SUCCEEDED',
-        'defaultDatasetId': 'ds-1',
-    })
+    mock_async.actor.return_value.call = AsyncMock(
+        return_value={
+            'id': 'run-1',
+            'status': 'SUCCEEDED',
+            'defaultDatasetId': 'ds-1',
+        }
+    )
     mock_list_items = AsyncMock()
     mock_list_items.return_value.items = RAG_ITEMS
     mock_async.dataset.return_value.list_items = mock_list_items

From 2804a5c1832590a84574d9cf5ea4f758354c17e2 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Fri, 24 Apr 2026 12:08:50 +0200
Subject: [PATCH 43/63] fix: tests fix

---
 langchain_apify/_actor_tools.py     | 2 +-
 tests/unit_tests/test_retrievers.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
index 43a73bc..0746f4c 100644
--- a/langchain_apify/_actor_tools.py
+++ b/langchain_apify/_actor_tools.py
@@ -11,6 +11,7 @@
 from typing import TYPE_CHECKING
 
 from langchain_core.tools import ToolException
+from pydantic import BaseModel  # noqa: TCH002
 
 from langchain_apify.tools import (
     ApifyGoogleSearchInput,
@@ -20,7 +21,6 @@
 
 if TYPE_CHECKING:
     from langchain_core.callbacks import CallbackManagerForToolRun
-    from pydantic import BaseModel
 
 # ---------------------------------------------------------------------------
 # Search & Crawling tools
diff --git a/tests/unit_tests/test_retrievers.py b/tests/unit_tests/test_retrievers.py
index 8eede95..ee02dba 100644
--- a/tests/unit_tests/test_retrievers.py
+++ b/tests/unit_tests/test_retrievers.py
@@ -98,6 +98,7 @@ def test_sync_passes_correct_input() -> None:
     mock_client.actor.return_value.call.assert_called_once_with(
         run_input={'query': 'my search', 'maxResults': 3},
         timeout_secs=60,
+        logger=None,
     )
     mock_client.dataset.return_value.list_items.assert_called_once_with(
         limit=3,

From ea8b16edf8ba33d29e0f7b3b79cd299825808703 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 10:07:19 +0200
Subject: [PATCH 44/63] chore: rename tools to match the task description

---
 langchain_apify/__init__.py                   | 12 +++---
 langchain_apify/tools.py                      | 28 ++++++-------
 tests/integration_tests/test_generic_tools.py |  8 ++--
 tests/unit_tests/test_tools.py                | 40 +++++++++----------
 4 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
index bca8081..7d0dfa9 100644
--- a/langchain_apify/__init__.py
+++ b/langchain_apify/__init__.py
@@ -7,9 +7,9 @@
 from langchain_apify.tools import (
     ApifyActorsTool,
     ApifyGetDatasetItemsTool,
-    ApifyRunActorAndGetItemsTool,
+    ApifyRunActorAndGetDatasetTool,
     ApifyRunActorTool,
-    ApifyRunTaskAndGetItemsTool,
+    ApifyRunTaskAndGetDatasetTool,
     ApifyRunTaskTool,
     ApifyScrapeUrlTool,
 )
@@ -31,10 +31,10 @@
 APIFY_CORE_TOOLS: list[type[BaseTool]] = [
     ApifyRunActorTool,
     ApifyGetDatasetItemsTool,
-    ApifyRunActorAndGetItemsTool,
+    ApifyRunActorAndGetDatasetTool,
     ApifyScrapeUrlTool,
     ApifyRunTaskTool,
-    ApifyRunTaskAndGetItemsTool,
+    ApifyRunTaskAndGetDatasetTool,
 ]
 
 __all__ = [
@@ -44,9 +44,9 @@
     'ApifyWrapper',
     # Core generic tools
     'ApifyGetDatasetItemsTool',
-    'ApifyRunActorAndGetItemsTool',
+    'ApifyRunActorAndGetDatasetTool',
     'ApifyRunActorTool',
-    'ApifyRunTaskAndGetItemsTool',
+    'ApifyRunTaskAndGetDatasetTool',
     'ApifyRunTaskTool',
     'ApifyScrapeUrlTool',
     # Tool group lists
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index e7721b7..2b7cedf 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -225,8 +225,8 @@ class ApifyGetDatasetItemsInput(BaseModel):
     offset: int = Field(default=0, description='Number of items to skip from the start.')
 
 
-class ApifyRunActorAndGetItemsInput(BaseModel):
-    """Input schema for :class:`ApifyRunActorAndGetItemsTool`."""
+class ApifyRunActorAndGetDatasetInput(BaseModel):
+    """Input schema for :class:`ApifyRunActorAndGetDatasetTool`."""
 
     actor_id: str = Field(description='Actor ID or name (e.g. "apify/python-example").')
     run_input: dict | None = Field(default=None, description='JSON-serialisable input for the Actor.')
@@ -255,8 +255,8 @@ class ApifyRunTaskInput(BaseModel):
     )
 
 
-class ApifyRunTaskAndGetItemsInput(BaseModel):
-    """Input schema for :class:`ApifyRunTaskAndGetItemsTool`."""
+class ApifyRunTaskAndGetDatasetInput(BaseModel):
+    """Input schema for :class:`ApifyRunTaskAndGetDatasetTool`."""
 
     task_id: str = Field(description='Task ID or name (e.g. "user/my-task").')
     task_input: dict | None = Field(
@@ -454,7 +454,7 @@ def _run(
         return json.dumps({'items': items})
 
 
-class ApifyRunActorAndGetItemsTool(_ApifyGenericTool):  # type: ignore[override]
+class ApifyRunActorAndGetDatasetTool(_ApifyGenericTool):  # type: ignore[override]
     """Run any Apify Actor and return both run metadata and dataset items.
 
     Combines :class:`ApifyRunActorTool` and :class:`ApifyGetDatasetItemsTool`
@@ -476,16 +476,16 @@ class ApifyRunActorAndGetItemsTool(_ApifyGenericTool):  # type: ignore[override]
             import os
             os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
 
-            from langchain_apify import ApifyRunActorAndGetItemsTool
+            from langchain_apify import ApifyRunActorAndGetDatasetTool
 
-            tool = ApifyRunActorAndGetItemsTool()
+            tool = ApifyRunActorAndGetDatasetTool()
             result = tool.invoke({
                 "actor_id": "apify/python-example",
                 "run_input": {"first_number": 2, "second_number": 3},
             })
     """
 
-    name: str = 'apify_run_actor_and_get_items'
+    name: str = 'apify_run_actor_and_get_dataset'
     description: str = (
         'Run an Apify Actor synchronously and return both run metadata and dataset items.'
         ' Required: actor_id (str) — Actor ID or name (e.g. "apify/python-example").'
@@ -494,7 +494,7 @@ class ApifyRunActorAndGetItemsTool(_ApifyGenericTool):  # type: ignore[override]
         ' Returns JSON with keys: run (run_id, status, dataset_id, started_at, finished_at)'
         ' and items (list of dataset item dicts).'
     )
-    args_schema: type[BaseModel] = ApifyRunActorAndGetItemsInput
+    args_schema: type[BaseModel] = ApifyRunActorAndGetDatasetInput
 
     def _run(
         self,
@@ -625,7 +625,7 @@ def _run(
         return json.dumps(_run_meta(run))
 
 
-class ApifyRunTaskAndGetItemsTool(_ApifyGenericTool):  # type: ignore[override]
+class ApifyRunTaskAndGetDatasetTool(_ApifyGenericTool):  # type: ignore[override]
     """Run a saved Apify Actor task and return both run metadata and dataset items.
 
     Combines :class:`ApifyRunTaskTool` and :class:`ApifyGetDatasetItemsTool`
@@ -647,16 +647,16 @@ class ApifyRunTaskAndGetItemsTool(_ApifyGenericTool):  # type: ignore[override]
             import os
             os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
 
-            from langchain_apify import ApifyRunTaskAndGetItemsTool
+            from langchain_apify import ApifyRunTaskAndGetDatasetTool
 
-            tool = ApifyRunTaskAndGetItemsTool()
+            tool = ApifyRunTaskAndGetDatasetTool()
             result = tool.invoke({
                 "task_id": "user/my-task",
                 "task_input": {"key": "value"},
             })
     """
 
-    name: str = 'apify_run_task_and_get_items'
+    name: str = 'apify_run_task_and_get_dataset'
     description: str = (
         'Run a saved Apify Actor task synchronously and return both run metadata and dataset items.'
         ' Required: task_id (str) — task ID or name (e.g. "user/my-task").'
@@ -665,7 +665,7 @@ class ApifyRunTaskAndGetItemsTool(_ApifyGenericTool):  # type: ignore[override]
         ' Returns JSON with keys: run (run_id, status, dataset_id, started_at, finished_at)'
         ' and items (list of dataset item dicts).'
     )
-    args_schema: type[BaseModel] = ApifyRunTaskAndGetItemsInput
+    args_schema: type[BaseModel] = ApifyRunTaskAndGetDatasetInput
 
     def _run(
         self,
diff --git a/tests/integration_tests/test_generic_tools.py b/tests/integration_tests/test_generic_tools.py
index 863efb1..3f2a7c8 100644
--- a/tests/integration_tests/test_generic_tools.py
+++ b/tests/integration_tests/test_generic_tools.py
@@ -14,9 +14,9 @@
 
 from langchain_apify import (
     ApifyGetDatasetItemsTool,
-    ApifyRunActorAndGetItemsTool,
+    ApifyRunActorAndGetDatasetTool,
     ApifyRunActorTool,
-    ApifyRunTaskAndGetItemsTool,
+    ApifyRunTaskAndGetDatasetTool,
     ApifyRunTaskTool,
     ApifyScrapeUrlTool,
 )
@@ -54,7 +54,7 @@ def test_get_dataset_items_tool_smoke() -> None:
 
 
 def test_run_actor_and_get_items_tool_smoke() -> None:
-    tool = ApifyRunActorAndGetItemsTool()
+    tool = ApifyRunActorAndGetDatasetTool()
     result = tool.invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT})
 
     parsed = json.loads(result)
@@ -86,7 +86,7 @@ def test_run_task_tool_smoke() -> None:
 
 @pytest.mark.skipif(not _TASK_ID, reason='APIFY_TASK_ID not set')
 def test_run_task_and_get_items_tool_smoke() -> None:
-    tool = ApifyRunTaskAndGetItemsTool()
+    tool = ApifyRunTaskAndGetDatasetTool()
     result = tool.invoke({'task_id': _TASK_ID})
 
     parsed = json.loads(result)
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 67fa1a7..9abe9dc 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -15,9 +15,9 @@
 from langchain_apify.tools import (
     ApifyActorsTool,
     ApifyGetDatasetItemsTool,
-    ApifyRunActorAndGetItemsTool,
+    ApifyRunActorAndGetDatasetTool,
     ApifyRunActorTool,
-    ApifyRunTaskAndGetItemsTool,
+    ApifyRunTaskAndGetDatasetTool,
     ApifyRunTaskTool,
     ApifyScrapeUrlTool,
     _ApifyGenericTool,
@@ -253,13 +253,13 @@ def test_get_dataset_items_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -
 
 
 # ---------------------------------------------------------------------------
-# ApifyRunActorAndGetItemsTool
+# ApifyRunActorAndGetDatasetTool
 # ---------------------------------------------------------------------------
 
 
 def test_run_actor_and_get_items_tool_returns_json(mock_tools_client: MagicMock) -> None:
     mock_tools_client.run_actor_and_get_items.return_value = (SUCCEEDED_RUN, SAMPLE_ITEMS)
-    tool = make_tool(ApifyRunActorAndGetItemsTool, mock_tools_client)
+    tool = make_tool(ApifyRunActorAndGetDatasetTool, mock_tools_client)
 
     result = tool._run(actor_id='apify/test', run_input={'q': '1'}, dataset_items_limit=50)
 
@@ -274,7 +274,7 @@ def test_run_actor_and_get_items_tool_failure_raises_tool_exception(mock_tools_c
     mock_tools_client.run_actor_and_get_items.side_effect = RuntimeError(
         'Actor run run-bad ended with status TIMED-OUT.'
     )
-    tool = make_tool(ApifyRunActorAndGetItemsTool, mock_tools_client)
+    tool = make_tool(ApifyRunActorAndGetDatasetTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='TIMED-OUT'):
         tool._run(actor_id='apify/test')
@@ -283,7 +283,7 @@ def test_run_actor_and_get_items_tool_failure_raises_tool_exception(mock_tools_c
 def test_run_actor_and_get_items_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
     with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
-        ApifyRunActorAndGetItemsTool()
+        ApifyRunActorAndGetDatasetTool()
 
 
 # ---------------------------------------------------------------------------
@@ -350,13 +350,13 @@ def test_run_task_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
 
 
 # ---------------------------------------------------------------------------
-# ApifyRunTaskAndGetItemsTool
+# ApifyRunTaskAndGetDatasetTool
 # ---------------------------------------------------------------------------
 
 
 def test_run_task_and_get_items_tool_returns_json(mock_tools_client: MagicMock) -> None:
     mock_tools_client.run_task_and_get_items.return_value = (SUCCEEDED_RUN, SAMPLE_ITEMS)
-    tool = make_tool(ApifyRunTaskAndGetItemsTool, mock_tools_client)
+    tool = make_tool(ApifyRunTaskAndGetDatasetTool, mock_tools_client)
 
     result = tool._run(task_id='user/my-task', task_input={'q': '1'}, dataset_items_limit=50)
 
@@ -371,7 +371,7 @@ def test_run_task_and_get_items_tool_failure_raises_tool_exception(mock_tools_cl
     mock_tools_client.run_task_and_get_items.side_effect = RuntimeError(
         'Actor run run-bad ended with status TIMED-OUT.'
     )
-    tool = make_tool(ApifyRunTaskAndGetItemsTool, mock_tools_client)
+    tool = make_tool(ApifyRunTaskAndGetDatasetTool, mock_tools_client)
 
     with pytest.raises(ToolException, match='TIMED-OUT'):
         tool._run(task_id='user/my-task')
@@ -380,7 +380,7 @@ def test_run_task_and_get_items_tool_failure_raises_tool_exception(mock_tools_cl
 def test_run_task_and_get_items_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
     with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
-        ApifyRunTaskAndGetItemsTool()
+        ApifyRunTaskAndGetDatasetTool()
 
 
 # ---------------------------------------------------------------------------
@@ -427,7 +427,7 @@ def test_get_dataset_items_tool_clamps_limit(mock_tools_client: MagicMock) -> No
 def test_run_actor_and_get_items_tool_clamps_all(mock_tools_client: MagicMock) -> None:
     mock_tools_client.run_actor_and_get_items.return_value = (SUCCEEDED_RUN, SAMPLE_ITEMS)
     tool = make_tool(
-        ApifyRunActorAndGetItemsTool,
+        ApifyRunActorAndGetDatasetTool,
         mock_tools_client,
         max_timeout_secs=30,
         max_memory_mbytes=256,
@@ -460,7 +460,7 @@ def test_run_task_tool_clamps_timeout_and_memory(mock_tools_client: MagicMock) -
 def test_run_task_and_get_items_tool_clamps_all(mock_tools_client: MagicMock) -> None:
     mock_tools_client.run_task_and_get_items.return_value = (SUCCEEDED_RUN, SAMPLE_ITEMS)
     tool = make_tool(
-        ApifyRunTaskAndGetItemsTool,
+        ApifyRunTaskAndGetDatasetTool,
         mock_tools_client,
         max_timeout_secs=30,
         max_memory_mbytes=256,
@@ -529,19 +529,19 @@ def test_generic_tools_have_correct_metadata() -> None:
         tools = [
             ApifyRunActorTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
             ApifyGetDatasetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
-            ApifyRunActorAndGetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
+            ApifyRunActorAndGetDatasetTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
             ApifyScrapeUrlTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
             ApifyRunTaskTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
-            ApifyRunTaskAndGetItemsTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
+            ApifyRunTaskAndGetDatasetTool(apify_api_token='dummy'),  # type: ignore[call-arg,arg-type]
         ]
 
     expected_names = [
         'apify_run_actor',
         'apify_get_dataset_items',
-        'apify_run_actor_and_get_items',
+        'apify_run_actor_and_get_dataset',
         'apify_scrape_url',
         'apify_run_task',
-        'apify_run_task_and_get_items',
+        'apify_run_task_and_get_dataset',
     ]
 
     for tool, expected_name in zip(tools, expected_names):
@@ -569,10 +569,10 @@ def test_all_generic_tools_inherit_from_base() -> None:
     for tool_cls in (
         ApifyRunActorTool,
         ApifyGetDatasetItemsTool,
-        ApifyRunActorAndGetItemsTool,
+        ApifyRunActorAndGetDatasetTool,
         ApifyScrapeUrlTool,
         ApifyRunTaskTool,
-        ApifyRunTaskAndGetItemsTool,
+        ApifyRunTaskAndGetDatasetTool,
     ):
         assert issubclass(tool_cls, _ApifyGenericTool), f'{tool_cls.__name__} must extend _ApifyGenericTool'
 
@@ -592,9 +592,9 @@ def test_apify_core_tools_contains_all_generic_classes() -> None:
     assert set(APIFY_CORE_TOOLS) == {
         ApifyRunActorTool,
         ApifyGetDatasetItemsTool,
-        ApifyRunActorAndGetItemsTool,
+        ApifyRunActorAndGetDatasetTool,
         ApifyScrapeUrlTool,
         ApifyRunTaskTool,
-        ApifyRunTaskAndGetItemsTool,
+        ApifyRunTaskAndGetDatasetTool,
     }
     assert len(APIFY_CORE_TOOLS) == 6

From cd1eea1fc4a001296f941954decac2b4e996693d Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 10:25:04 +0200
Subject: [PATCH 45/63] fix: narrow except blocks in _client.py to
 SDK/transport errors

---
 langchain_apify/_client.py      | 21 +++++++++++++--------
 tests/unit_tests/test_client.py | 31 ++++++++++++++++++++-----------
 tests/unit_tests/test_tools.py  |  4 ++--
 3 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index 9a87d46..618d007 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -2,7 +2,9 @@
 
 import os
 
+import httpx
 from apify_client import ApifyClient
+from apify_client.errors import ApifyClientError
 from pydantic import SecretStr
 
 from langchain_apify._error_messages import (
@@ -12,6 +14,9 @@
 )
 from langchain_apify._utils import _create_apify_client
 
+# Only catches ApifyClientError and httpx.HTTPError. Other errors propagate.
+_TRANSPORT_EXCEPTIONS = (ApifyClientError, httpx.HTTPError)
+
 _SCRAPE_ACTOR_ID = 'apify/website-content-crawler'
 _DEFAULT_RUN_TIMEOUT_SECS = 300
 _DEFAULT_SCRAPE_TIMEOUT_SECS = 120
@@ -72,8 +77,8 @@ def run_actor(
 
         try:
             run = self._client.actor(actor_id).call(**call_kwargs)
-        except Exception as exc:
-            msg = f'Network error calling Actor {actor_id}: {exc}'
+        except _TRANSPORT_EXCEPTIONS as exc:
+            msg = f'Apify Actor call failed for {actor_id}: {exc}'
             raise RuntimeError(msg) from exc
         if run is None:
             msg = f'Actor {actor_id} call returned no run details.'
@@ -96,8 +101,8 @@ def get_dataset_items(
         """
         try:
             return self._client.dataset(dataset_id).list_items(limit=limit, offset=offset, clean=True).items
-        except Exception as exc:
-            msg = f'Network error fetching dataset {dataset_id}: {exc}'
+        except _TRANSPORT_EXCEPTIONS as exc:
+            msg = f'Apify dataset fetch failed for {dataset_id}: {exc}'
             raise RuntimeError(msg) from exc
 
     def run_actor_and_get_items(
@@ -159,8 +164,8 @@ def run_task(
 
         try:
             run = self._client.task(task_id).call(**call_kwargs)
-        except Exception as exc:
-            msg = f'Network error calling task {task_id}: {exc}'
+        except _TRANSPORT_EXCEPTIONS as exc:
+            msg = f'Apify task call failed for {task_id}: {exc}'
             raise RuntimeError(msg) from exc
         if run is None:
             msg = f'Task {task_id} call returned no run details.'
@@ -239,8 +244,8 @@ def _list_items_or_raise(self, dataset_id: str, limit: int) -> list[dict]:
         """Fetch dataset items, wrapping any network error in a RuntimeError."""
         try:
             return self._client.dataset(dataset_id).list_items(limit=limit, clean=True).items
-        except Exception as exc:
-            msg = f'Network error fetching dataset {dataset_id}: {exc}'
+        except _TRANSPORT_EXCEPTIONS as exc:
+            msg = f'Apify dataset fetch failed for {dataset_id}: {exc}'
             raise RuntimeError(msg) from exc
 
     @staticmethod
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index 40c73dc..c43e4d1 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -2,6 +2,7 @@
 
 from unittest.mock import MagicMock, patch
 
+import httpx
 import pytest
 
 from langchain_apify._client import ApifyToolsClient
@@ -238,21 +239,21 @@ def test_run_task_none_return_raises(client: ApifyToolsClient, mock_apify_client
 
 
 # ---------------------------------------------------------------------------
-# Network error wrapping (transport exception -> RuntimeError)
+# Transport-error wrapping (httpx / ApifyClientError -> RuntimeError)
 # ---------------------------------------------------------------------------
 
 
 def test_run_actor_network_error_wraps(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.actor.return_value.call.side_effect = ConnectionError('conn refused')
+    mock_apify_client.actor.return_value.call.side_effect = httpx.ConnectError('conn refused')
 
-    with pytest.raises(RuntimeError, match='Network error calling Actor'):
+    with pytest.raises(RuntimeError, match='Apify Actor call failed'):
         client.run_actor('apify/test-actor')
 
 
 def test_get_dataset_items_network_error_wraps(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.dataset.return_value.list_items.side_effect = ConnectionError('timeout')
+    mock_apify_client.dataset.return_value.list_items.side_effect = httpx.ConnectError('timeout')
 
-    with pytest.raises(RuntimeError, match='Network error fetching dataset'):
+    with pytest.raises(RuntimeError, match='Apify dataset fetch failed'):
         client.get_dataset_items('dataset-xyz')
 
 
@@ -260,16 +261,16 @@ def test_run_actor_and_get_items_dataset_fetch_network_error(
     client: ApifyToolsClient, mock_apify_client: MagicMock
 ) -> None:
     mock_apify_client.actor.return_value.call.return_value = SUCCEEDED_RUN
-    mock_apify_client.dataset.return_value.list_items.side_effect = ConnectionError('reset')
+    mock_apify_client.dataset.return_value.list_items.side_effect = httpx.ConnectError('reset')
 
-    with pytest.raises(RuntimeError, match='Network error fetching dataset'):
+    with pytest.raises(RuntimeError, match='Apify dataset fetch failed'):
         client.run_actor_and_get_items('apify/test-actor')
 
 
 def test_run_task_network_error_wraps(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
-    mock_apify_client.task.return_value.call.side_effect = ConnectionError('conn refused')
+    mock_apify_client.task.return_value.call.side_effect = httpx.ConnectError('conn refused')
 
-    with pytest.raises(RuntimeError, match='Network error calling task'):
+    with pytest.raises(RuntimeError, match='Apify task call failed'):
         client.run_task('user/my-task')
 
 
@@ -277,7 +278,15 @@ def test_run_task_and_get_items_dataset_fetch_network_error(
     client: ApifyToolsClient, mock_apify_client: MagicMock
 ) -> None:
     mock_apify_client.task.return_value.call.return_value = SUCCEEDED_RUN
-    mock_apify_client.dataset.return_value.list_items.side_effect = ConnectionError('reset')
+    mock_apify_client.dataset.return_value.list_items.side_effect = httpx.ConnectError('reset')
 
-    with pytest.raises(RuntimeError, match='Network error fetching dataset'):
+    with pytest.raises(RuntimeError, match='Apify dataset fetch failed'):
         client.run_task_and_get_items('user/my-task')
+
+
+def test_run_actor_programming_error_propagates(client: ApifyToolsClient, mock_apify_client: MagicMock) -> None:
+    """Non-transport exceptions (programming errors) must NOT be wrapped as RuntimeError."""
+    mock_apify_client.actor.return_value.call.side_effect = AttributeError('bug in SDK')
+
+    with pytest.raises(AttributeError, match='bug in SDK'):
+        client.run_actor('apify/test-actor')
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 9abe9dc..4a5dbdd 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -238,11 +238,11 @@ def test_get_dataset_items_tool_empty_returns_message(mock_tools_client: MagicMo
 
 def test_get_dataset_items_tool_network_error_raises_tool_exception(mock_tools_client: MagicMock) -> None:
     mock_tools_client.get_dataset_items.side_effect = RuntimeError(
-        'Network error fetching dataset ds-bad: connection reset'
+        'Apify dataset fetch failed for ds-bad: connection reset'
     )
     tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client)
 
-    with pytest.raises(ToolException, match='Network error fetching dataset'):
+    with pytest.raises(ToolException, match='Apify dataset fetch failed'):
         tool._run(dataset_id='ds-bad')
 
 

From 50c3583243919d50f240a4f1a0963822a6ec2c33 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 10:33:57 +0200
Subject: [PATCH 46/63] fix: clamp memory_mbytes to Apify platform minimum (128
 MB)

---
 langchain_apify/tools.py       |  7 +++++--
 tests/unit_tests/test_tools.py | 20 +++++++++++++++++---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 2b7cedf..fafc858 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -333,9 +333,12 @@ def _clamp_timeout(self, value: int) -> int:
         return max(1, min(value, self.max_timeout_secs))
 
     def _clamp_memory(self, value: int | None) -> int | None:
-        if value is None:
+        # Non-positive values fall through to the platform default. Positive
+        # values are floored at 128 MB (the Apify platform minimum) so the LLM
+        # cannot drive into an API rejection by requesting too little memory.
+        if value is None or value <= 0:
             return None
-        return max(1, min(value, self.max_memory_mbytes))
+        return max(128, min(value, self.max_memory_mbytes))
 
     def _clamp_items(self, value: int) -> int:
         return max(1, min(value, self.max_items))
diff --git a/tests/unit_tests/test_tools.py b/tests/unit_tests/test_tools.py
index 4a5dbdd..108c695 100644
--- a/tests/unit_tests/test_tools.py
+++ b/tests/unit_tests/test_tools.py
@@ -484,16 +484,30 @@ def test_clamp_timeout_floor_is_one(mock_tools_client: MagicMock) -> None:
     mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 1, None)
 
 
-def test_clamp_memory_floor_is_one(mock_tools_client: MagicMock) -> None:
+def test_clamp_memory_non_positive_is_treated_as_none(mock_tools_client: MagicMock) -> None:
+    """memory_mbytes <= 0 maps to None so the Apify platform default is used."""
     mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
     tool = make_tool(ApifyRunActorTool, mock_tools_client, max_memory_mbytes=4096)
 
     tool._run(actor_id='apify/test', memory_mbytes=-1)
-    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, 1)
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, None)
 
     mock_tools_client.run_actor.reset_mock()
     tool._run(actor_id='apify/test', memory_mbytes=0)
-    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, 1)
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, None)
+
+
+def test_clamp_memory_floors_positive_below_platform_minimum(mock_tools_client: MagicMock) -> None:
+    """A positive memory_mbytes below the Apify platform minimum (128 MB) is floored to 128."""
+    mock_tools_client.run_actor.return_value = SUCCEEDED_RUN
+    tool = make_tool(ApifyRunActorTool, mock_tools_client, max_memory_mbytes=4096)
+
+    tool._run(actor_id='apify/test', memory_mbytes=64)
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, 128)
+
+    mock_tools_client.run_actor.reset_mock()
+    tool._run(actor_id='apify/test', memory_mbytes=1)
+    mock_tools_client.run_actor.assert_called_once_with('apify/test', None, 300, 128)
 
 
 def test_clamp_items_floor_is_one(mock_tools_client: MagicMock) -> None:

From 450728cb10ac6ed9dac16886fab48dc8586b9009 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 10:36:08 +0200
Subject: [PATCH 47/63] fix: narrow empty-dataset message in
 ApifyGetDatasetItemsTool

---
 langchain_apify/tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index fafc858..4cd182d 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -453,7 +453,7 @@ def _run(
         except RuntimeError as exc:
             raise ToolException(str(exc)) from exc
         if not items:
-            return json.dumps({'items': [], 'message': 'Dataset is empty or not found.'})
+            return json.dumps({'items': [], 'message': f'Dataset {dataset_id} is empty.'})
         return json.dumps({'items': items})
 
 

From 1360e9228a3be72c6dd6105b3f725d1c16dc599d Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 10:40:51 +0200
Subject: [PATCH 48/63] ref: simplify ApifyToolsClient.__init__ to require
 explicit token

---
 langchain_apify/_client.py      | 20 +++++---------------
 tests/unit_tests/test_client.py | 12 ++----------
 2 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index 618d007..cc1e4b8 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -1,11 +1,8 @@
 from __future__ import annotations
 
-import os
-
 import httpx
 from apify_client import ApifyClient
 from apify_client.errors import ApifyClientError
-from pydantic import SecretStr
 
 from langchain_apify._error_messages import (
     _ERROR_ACTOR_RUN_FAILED,
@@ -31,24 +28,17 @@ class ApifyToolsClient:
     block until the Actor run finishes.
 
     Args:
-        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
-            environment variable when *None*.
+        apify_api_token: Apify API token.
 
     Raises:
-        ValueError: If no token is provided and the env var is not set.
+        ValueError: If the token is empty.
     """
 
-    def __init__(self, apify_api_token: SecretStr | str | None = None) -> None:
-        _token: str | None = None
-        if isinstance(apify_api_token, SecretStr):
-            _token = apify_api_token.get_secret_value()
-        else:
-            _token = apify_api_token or os.getenv('APIFY_API_TOKEN')
-
-        if not _token:
+    def __init__(self, apify_api_token: str) -> None:
+        if not apify_api_token:
             msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
             raise ValueError(msg)
-        self._client = _create_apify_client(ApifyClient, _token)
+        self._client = _create_apify_client(ApifyClient, apify_api_token)
 
     def run_actor(
         self,
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index c43e4d1..43f6f83 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -20,17 +20,9 @@ def test_init_with_explicit_token(mock_apify_client: MagicMock) -> None:
         assert c._client is mock_apify_client
 
 
-def test_init_with_env_token(monkeypatch: pytest.MonkeyPatch, mock_apify_client: MagicMock) -> None:
-    monkeypatch.setenv('APIFY_API_TOKEN', 'env-token')
-    with patch('langchain_apify._client._create_apify_client', return_value=mock_apify_client):
-        c = ApifyToolsClient()
-        assert c._client is mock_apify_client
-
-
-def test_init_missing_token_raises(monkeypatch: pytest.MonkeyPatch) -> None:
-    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+def test_init_empty_token_raises() -> None:
     with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
-        ApifyToolsClient()
+        ApifyToolsClient(apify_api_token='')
 
 
 # ---------------------------------------------------------------------------

From 09b6c6e045b9b2815bfbc0be28527635e85c4d26 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 10:45:55 +0200
Subject: [PATCH 49/63] docs: add module-level docstring to tools.py

---
 langchain_apify/tools.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 4cd182d..385fc57 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -1,3 +1,21 @@
+"""LangChain tools for the Apify platform.
+
+All tools require an Apify API token. Set it via the ``APIFY_API_TOKEN``
+environment variable, or pass ``apify_api_token`` to the tool constructor:
+
+.. code-block:: python
+
+    import os
+    os.environ["APIFY_API_TOKEN"] = "your-apify-api-token"
+
+    from langchain_apify import ApifyRunActorTool
+
+    tool = ApifyRunActorTool()
+    result = tool.invoke({"actor_id": "apify/python-example"})
+
+For details, see https://docs.apify.com/platform/integrations/langchain
+"""
+
 from __future__ import annotations
 
 import json

From a5bd7cce8a178da607392b651f361692af5fb682 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 10:48:52 +0200
Subject: [PATCH 50/63] ref: rename model_post_init parameter to

---
 langchain_apify/tools.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 385fc57..46846f2 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -340,12 +340,12 @@ class _ApifyGenericTool(BaseTool):  # type: ignore[override]
 
     _client: ApifyToolsClient = PrivateAttr()
 
-    def model_post_init(self, __context: Any) -> None:  # noqa: ANN401
+    def model_post_init(self, context: Any) -> None:  # noqa: ANN401
         if self.apify_api_token is None:
             msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
             raise ValueError(msg)
         self._client = ApifyToolsClient(apify_api_token=self.apify_api_token.get_secret_value())
-        super().model_post_init(__context)
+        super().model_post_init(context)
 
     def _clamp_timeout(self, value: int) -> int:
         return max(1, min(value, self.max_timeout_secs))

From 23242c1a490b8ec64f03838cf97ac33412bd1a59 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 11:18:51 +0200
Subject: [PATCH 51/63] revert: restore env-fallback

---
 langchain_apify/_client.py      | 19 ++++++++++++++-----
 tests/unit_tests/test_client.py | 12 ++++++++++--
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
index cc1e4b8..77fe0dd 100644
--- a/langchain_apify/_client.py
+++ b/langchain_apify/_client.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
 
+import os
+
 import httpx
 from apify_client import ApifyClient
 from apify_client.errors import ApifyClientError
+from pydantic import SecretStr
 
 from langchain_apify._error_messages import (
     _ERROR_ACTOR_RUN_FAILED,
@@ -28,17 +31,23 @@ class ApifyToolsClient:
     block until the Actor run finishes.
 
     Args:
-        apify_api_token: Apify API token.
+        apify_api_token: Apify API token. Falls back to the ``APIFY_API_TOKEN``
+            environment variable when *None*.
 
     Raises:
-        ValueError: If the token is empty.
+        ValueError: If no token is provided and the env var is not set.
     """
 
-    def __init__(self, apify_api_token: str) -> None:
-        if not apify_api_token:
+    def __init__(self, apify_api_token: SecretStr | str | None = None) -> None:
+        if isinstance(apify_api_token, SecretStr):
+            _token: str | None = apify_api_token.get_secret_value()
+        else:
+            _token = apify_api_token or os.getenv('APIFY_API_TOKEN')
+
+        if not _token:
             msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
             raise ValueError(msg)
-        self._client = _create_apify_client(ApifyClient, apify_api_token)
+        self._client = _create_apify_client(ApifyClient, _token)
 
     def run_actor(
         self,
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
index 43f6f83..c43e4d1 100644
--- a/tests/unit_tests/test_client.py
+++ b/tests/unit_tests/test_client.py
@@ -20,9 +20,17 @@ def test_init_with_explicit_token(mock_apify_client: MagicMock) -> None:
         assert c._client is mock_apify_client
 
 
-def test_init_empty_token_raises() -> None:
+def test_init_with_env_token(monkeypatch: pytest.MonkeyPatch, mock_apify_client: MagicMock) -> None:
+    monkeypatch.setenv('APIFY_API_TOKEN', 'env-token')
+    with patch('langchain_apify._client._create_apify_client', return_value=mock_apify_client):
+        c = ApifyToolsClient()
+        assert c._client is mock_apify_client
+
+
+def test_init_missing_token_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
     with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
-        ApifyToolsClient(apify_api_token='')
+        ApifyToolsClient()
 
 
 # ---------------------------------------------------------------------------

From 7ea3e8ccfbbb6f1af76bfaa4ac75f4dea451249c Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 12:12:54 +0200
Subject: [PATCH 52/63] chore: drop placeholder section in _actor_tools.py

---
 langchain_apify/_actor_tools.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
index 0746f4c..bc06136 100644
--- a/langchain_apify/_actor_tools.py
+++ b/langchain_apify/_actor_tools.py
@@ -1,8 +1,8 @@
-"""Actor-specific tool subclasses (search, social-media, etc.).
+"""Actor-specific tool subclasses.
 
-Downstream feature branches add concrete tools here.  They inherit from
-:class:`~langchain_apify.tools._ApifyGenericTool` and use
-:func:`~langchain_apify.tools._run_meta` to format run metadata.
+Tools in this module wrap a single Apify Actor behind a simplified,
+LLM-friendly interface. They inherit from
+:class:`~langchain_apify.tools._ApifyGenericTool`.
 """
 
 from __future__ import annotations
@@ -153,8 +153,3 @@ def _run(
             for item in items
         ]
         return json.dumps(pages)
-
-
-# ---------------------------------------------------------------------------
-# Social-media tools
-# ---------------------------------------------------------------------------

From 700e5ab8aeb76a1da09c484ae7dd042b7e197ba7 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 12:15:57 +0200
Subject: [PATCH 53/63] chore: align APIFY_ACTOR_TOOLS type hint with
 APIFY_CORE_TOOLS

---
 langchain_apify/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
index 5030d2a..287e2ab 100644
--- a/langchain_apify/__init__.py
+++ b/langchain_apify/__init__.py
@@ -39,7 +39,7 @@
     ApifyRunTaskAndGetDatasetTool,
 ]
 
-APIFY_ACTOR_TOOLS: list[type] = [
+APIFY_ACTOR_TOOLS: list[type[BaseTool]] = [
     ApifyGoogleSearchTool,
     ApifyWebCrawlerTool,
 ]

From c0dd11eaf77a96f7a7aa68a789bee79b54a06fdc Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 12:30:39 +0200
Subject: [PATCH 54/63] feat: constrain crawler_type to a Literal of valid
 Apify values

---
 langchain_apify/_actor_tools.py           |  3 ++-
 langchain_apify/document_loaders.py       |  4 +++-
 langchain_apify/tools.py                  | 10 ++++++++--
 tests/unit_tests/test_actor_tools.py      |  4 ++--
 tests/unit_tests/test_document_loaders.py |  4 ++--
 5 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
index bc06136..ec780f0 100644
--- a/langchain_apify/_actor_tools.py
+++ b/langchain_apify/_actor_tools.py
@@ -16,6 +16,7 @@
 from langchain_apify.tools import (
     ApifyGoogleSearchInput,
     ApifyWebCrawlerInput,
+    CrawlerType,
     _ApifyGenericTool,
 )
 
@@ -130,7 +131,7 @@ def _run(
         url: str,
         max_crawl_pages: int = 10,
         max_crawl_depth: int = 1,
-        crawler_type: str = 'cheerio',
+        crawler_type: CrawlerType = 'cheerio',
         timeout_secs: int = 300,
         _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index 014998e..6177a52 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -17,6 +17,8 @@
 if TYPE_CHECKING:
     from collections.abc import Iterator
 
+    from langchain_apify.tools import CrawlerType
+
 
 class ApifyDatasetLoader(BaseLoader, BaseModel):
     """Load datasets from Apify web scraping, crawling, and data extraction platform.
@@ -173,7 +175,7 @@ def __init__(  # noqa: PLR0913
         *,
         max_crawl_pages: int = 10,
         max_crawl_depth: int = 1,
-        crawler_type: str = 'cheerio',
+        crawler_type: CrawlerType = 'cheerio',
         timeout_secs: int = 300,
     ) -> None:
         self.url = url
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index 4fad7f2..f8af0e5 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -21,7 +21,7 @@
 import json
 import os
 from datetime import datetime
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Literal
 
 from apify_client import ApifyClient
 from langchain_core.tools import BaseTool, ToolException
@@ -42,6 +42,9 @@
     from langchain_core.callbacks import (
         CallbackManagerForToolRun,
     )
+    
+CrawlerType = Literal['cheerio', 'playwright:adaptive', 'playwright:firefox']
+
 
 
 class ApifyActorsTool(BaseTool):  # type: ignore[override, override]
@@ -275,7 +278,10 @@ class ApifyWebCrawlerInput(BaseModel):
     url: str = Field(description='Seed URL to start crawling from.')
     max_crawl_pages: int = Field(default=10, description='Maximum number of pages to crawl.')
     max_crawl_depth: int = Field(default=1, description='Maximum link-follow depth from the seed URL.')
-    crawler_type: str = Field(default='cheerio', description='Crawler engine (e.g. "cheerio", "playwright").')
+    crawler_type: CrawlerType = Field(
+        default='cheerio',
+        description='Crawler engine: "cheerio" (fast, static HTML), "playwright:adaptive" or "playwright:firefox".',
+    )
     timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the crawl to finish.')
 
 
diff --git a/tests/unit_tests/test_actor_tools.py b/tests/unit_tests/test_actor_tools.py
index 6448d89..df6e391 100644
--- a/tests/unit_tests/test_actor_tools.py
+++ b/tests/unit_tests/test_actor_tools.py
@@ -108,7 +108,7 @@ def test_web_crawler_tool_passes_params(mock_tools_client: MagicMock) -> None:
         url='https://example.com',
         max_crawl_pages=5,
         max_crawl_depth=2,
-        crawler_type='playwright',
+        crawler_type='playwright:firefox',
         timeout_secs=120,
     )
 
@@ -116,7 +116,7 @@ def test_web_crawler_tool_passes_params(mock_tools_client: MagicMock) -> None:
         'https://example.com',
         max_crawl_pages=5,
         max_crawl_depth=2,
-        crawler_type='playwright',
+        crawler_type='playwright:firefox',
         timeout_secs=120,
     )
 
diff --git a/tests/unit_tests/test_document_loaders.py b/tests/unit_tests/test_document_loaders.py
index 2bdcc8f..a9d7ca1 100644
--- a/tests/unit_tests/test_document_loaders.py
+++ b/tests/unit_tests/test_document_loaders.py
@@ -129,7 +129,7 @@ def test_crawl_loader_passes_params() -> None:
         mock_client,
         max_crawl_pages=5,
         max_crawl_depth=2,
-        crawler_type='playwright',
+        crawler_type='playwright:firefox',
         timeout_secs=120,
     )
 
@@ -139,7 +139,7 @@ def test_crawl_loader_passes_params() -> None:
         'https://example.com',
         max_crawl_pages=5,
         max_crawl_depth=2,
-        crawler_type='playwright',
+        crawler_type='playwright:firefox',
         timeout_secs=120,
     )
 

From 01899433c694415133f14bb3ad767047a2949993 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 12:34:46 +0200
Subject: [PATCH 55/63] feat: clamp max_crawl_depth in ApifyWebCrawlerTool

---
 langchain_apify/_actor_tools.py      |  2 +-
 langchain_apify/tools.py             |  7 ++++++-
 tests/unit_tests/test_actor_tools.py | 12 ++++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
index ec780f0..5301a17 100644
--- a/langchain_apify/_actor_tools.py
+++ b/langchain_apify/_actor_tools.py
@@ -139,7 +139,7 @@ def _run(
             items = self._client.crawl_website(
                 url,
                 max_crawl_pages=self._clamp_items(max_crawl_pages),
-                max_crawl_depth=max_crawl_depth,
+                max_crawl_depth=self._clamp_depth(max_crawl_depth),
                 crawler_type=crawler_type,
                 timeout_secs=self._clamp_timeout(timeout_secs),
             )
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index f8af0e5..b14bf3e 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -42,7 +42,7 @@
     from langchain_core.callbacks import (
         CallbackManagerForToolRun,
     )
-    
+
 CrawlerType = Literal['cheerio', 'playwright:adaptive', 'playwright:firefox']
 
 
@@ -362,6 +362,7 @@ class _ApifyGenericTool(BaseTool):  # type: ignore[override]
     max_timeout_secs: int = Field(default=600, description='Upper bound for timeout_secs the LLM may request.')
     max_memory_mbytes: int = Field(default=32768, description='Upper bound for memory_mbytes the LLM may request.')
     max_items: int = Field(default=1000, description='Upper bound for limit / dataset_items_limit the LLM may request.')
+    max_crawl_depth: int = Field(default=5, description='Upper bound for max_crawl_depth the LLM may request.')
 
     _client: ApifyToolsClient = PrivateAttr()
 
@@ -386,6 +387,10 @@ def _clamp_memory(self, value: int | None) -> int | None:
     def _clamp_items(self, value: int) -> int:
         return max(1, min(value, self.max_items))
 
+    def _clamp_depth(self, value: int) -> int:
+        # Floor at 0 (a depth of 0 means "only crawl the seed URL").
+        return max(0, min(value, self.max_crawl_depth))
+
 
 # ---------------------------------------------------------------------------
 # Generic tools
diff --git a/tests/unit_tests/test_actor_tools.py b/tests/unit_tests/test_actor_tools.py
index df6e391..227c3f7 100644
--- a/tests/unit_tests/test_actor_tools.py
+++ b/tests/unit_tests/test_actor_tools.py
@@ -132,6 +132,18 @@ def test_web_crawler_tool_clamps_pages_and_timeout(mock_tools_client: MagicMock)
     assert call_kwargs.kwargs['timeout_secs'] == 60
 
 
+def test_web_crawler_tool_clamps_depth(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.crawl_website.return_value = []
+    tool = make_tool(ApifyWebCrawlerTool, mock_tools_client, max_crawl_depth=2)
+
+    tool._run(url='https://example.com', max_crawl_depth=999)
+    assert mock_tools_client.crawl_website.call_args.kwargs['max_crawl_depth'] == 2
+
+    mock_tools_client.crawl_website.reset_mock()
+    tool._run(url='https://example.com', max_crawl_depth=-1)
+    assert mock_tools_client.crawl_website.call_args.kwargs['max_crawl_depth'] == 0
+
+
 def test_web_crawler_tool_empty_results(mock_tools_client: MagicMock) -> None:
     mock_tools_client.crawl_website.return_value = []
     tool = make_tool(ApifyWebCrawlerTool, mock_tools_client)

From 6d2422d8c033832fc242679870553a6855ea0b0a Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 12:40:57 +0200
Subject: [PATCH 56/63] feat: expose timeout_secs in ApifyGoogleSearchInput

---
 langchain_apify/_actor_tools.py      |  6 ++++--
 langchain_apify/tools.py             |  1 +
 tests/unit_tests/test_actor_tools.py | 13 +++++++++++--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/langchain_apify/_actor_tools.py b/langchain_apify/_actor_tools.py
index 5301a17..ab9c46d 100644
--- a/langchain_apify/_actor_tools.py
+++ b/langchain_apify/_actor_tools.py
@@ -60,7 +60,8 @@ class ApifyGoogleSearchTool(_ApifyGenericTool):  # type: ignore[override]
         ' Each result has keys: title, url, description.'
         ' Required: query (str) — the search query.'
         ' Optional: max_results (int, default 10),'
-        ' country_code (str|null), language_code (str|null).'
+        ' country_code (str|null), language_code (str|null),'
+        ' timeout_secs (int, default 300).'
     )
     args_schema: type[BaseModel] = ApifyGoogleSearchInput
 
@@ -70,6 +71,7 @@ def _run(
         max_results: int = 10,
         country_code: str | None = None,
         language_code: str | None = None,
+        timeout_secs: int = 300,
         _run_manager: CallbackManagerForToolRun | None = None,
     ) -> str:
         try:
@@ -78,7 +80,7 @@ def _run(
                 max_results=self._clamp_items(max_results),
                 country_code=country_code,
                 language_code=language_code,
-                timeout_secs=self.max_timeout_secs,
+                timeout_secs=self._clamp_timeout(timeout_secs),
             )
         except RuntimeError as exc:
             raise ToolException(str(exc)) from exc
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index b14bf3e..a012a1c 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -270,6 +270,7 @@ class ApifyGoogleSearchInput(BaseModel):
     max_results: int = Field(default=10, description='Maximum number of search results to return.')
     country_code: str | None = Field(default=None, description='Two-letter country code for localised results.')
     language_code: str | None = Field(default=None, description='Two-letter language code.')
+    timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the search to finish.')
 
 
 class ApifyWebCrawlerInput(BaseModel):
diff --git a/tests/unit_tests/test_actor_tools.py b/tests/unit_tests/test_actor_tools.py
index 227c3f7..fc03026 100644
--- a/tests/unit_tests/test_actor_tools.py
+++ b/tests/unit_tests/test_actor_tools.py
@@ -36,17 +36,26 @@ def test_google_search_tool_passes_params(mock_tools_client: MagicMock) -> None:
     mock_tools_client.google_search.return_value = []
     tool = make_tool(ApifyGoogleSearchTool, mock_tools_client)
 
-    tool._run(query='test', max_results=5, country_code='us', language_code='en')
+    tool._run(query='test', max_results=5, country_code='us', language_code='en', timeout_secs=120)
 
     mock_tools_client.google_search.assert_called_once_with(
         'test',
         max_results=5,
         country_code='us',
         language_code='en',
-        timeout_secs=600,
+        timeout_secs=120,
     )
 
 
+def test_google_search_tool_clamps_timeout(mock_tools_client: MagicMock) -> None:
+    mock_tools_client.google_search.return_value = []
+    tool = make_tool(ApifyGoogleSearchTool, mock_tools_client, max_timeout_secs=60)
+
+    tool._run(query='test', timeout_secs=9999)
+
+    assert mock_tools_client.google_search.call_args.kwargs['timeout_secs'] == 60
+
+
 def test_google_search_tool_clamps_max_results(mock_tools_client: MagicMock) -> None:
     mock_tools_client.google_search.return_value = []
     tool = make_tool(ApifyGoogleSearchTool, mock_tools_client, max_items=3)

From 2dfecd7e971adc9ea211d800e972969f75d6701b Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 12:43:08 +0200
Subject: [PATCH 57/63] ref: accept SecretStr token in ApifyCrawlLoader

---
 langchain_apify/document_loaders.py       | 2 +-
 tests/unit_tests/test_document_loaders.py | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index 6177a52..bfaa761 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -171,7 +171,7 @@ class ApifyCrawlLoader(BaseLoader):
     def __init__(  # noqa: PLR0913
         self,
         url: str,
-        apify_api_token: str | None = None,
+        apify_api_token: str | SecretStr | None = None,
         *,
         max_crawl_pages: int = 10,
         max_crawl_depth: int = 1,
diff --git a/tests/unit_tests/test_document_loaders.py b/tests/unit_tests/test_document_loaders.py
index a9d7ca1..5c71704 100644
--- a/tests/unit_tests/test_document_loaders.py
+++ b/tests/unit_tests/test_document_loaders.py
@@ -7,6 +7,7 @@
 from apify_client._types import ListPage
 from apify_client.clients import DatasetClient
 from langchain_core.documents import Document
+from pydantic import SecretStr
 
 from langchain_apify import ApifyCrawlLoader, ApifyDatasetLoader
 from langchain_apify._client import ApifyToolsClient
@@ -185,6 +186,12 @@ def test_crawl_loader_missing_token(monkeypatch: pytest.MonkeyPatch) -> None:
         ApifyCrawlLoader(url='https://example.com')
 
 
+def test_crawl_loader_accepts_secretstr_token() -> None:
+    with patch('langchain_apify._client._create_apify_client'):
+        loader = ApifyCrawlLoader(url='https://example.com', apify_api_token=SecretStr('s'))
+    assert loader.url == 'https://example.com'
+
+
 def test_crawl_loader_failure_raises() -> None:
     mock_client = MagicMock(spec=ApifyToolsClient)
     mock_client.crawl_website.side_effect = RuntimeError('Actor run run-bad ended with status FAILED.')

From 9c8178564eb503cf2b6cfb4f50e99d0655120ffa Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 12:49:13 +0200
Subject: [PATCH 58/63] docs: clarify ApifyCrawlLoader.lazy_load is not truly
 lazy

---
 langchain_apify/document_loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langchain_apify/document_loaders.py b/langchain_apify/document_loaders.py
index bfaa761..e1f0e6a 100644
--- a/langchain_apify/document_loaders.py
+++ b/langchain_apify/document_loaders.py
@@ -186,7 +186,7 @@ def __init__(  # noqa: PLR0913
         self._client = ApifyToolsClient(apify_api_token=apify_api_token)
 
     def lazy_load(self) -> Iterator[Document]:
-        """Crawl the website and yield Documents lazily.
+        """Crawl the website and yield Documents.
 
         Yields:
             Document: One document per crawled page.

From 49dd4f0651877163eb115ea208a7144eab4531b0 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 13:12:50 +0200
Subject: [PATCH 59/63] ref: rewrite ApifySearchRetriever to use
 ApifyToolsClient

---
 langchain_apify/retrievers.py       |  85 ++++++-----------
 langchain_apify/tools.py            |   1 -
 tests/unit_tests/test_retrievers.py | 143 ++++++++++++----------------
 3 files changed, 89 insertions(+), 140 deletions(-)

diff --git a/langchain_apify/retrievers.py b/langchain_apify/retrievers.py
index 9d4c30e..67a5d0a 100644
--- a/langchain_apify/retrievers.py
+++ b/langchain_apify/retrievers.py
@@ -2,16 +2,15 @@
 
 from __future__ import annotations
 
-import os
+import asyncio
 from typing import TYPE_CHECKING, Any
 
-from apify_client import ApifyClient, ApifyClientAsync
 from langchain_core.documents import Document
 from langchain_core.retrievers import BaseRetriever
-from pydantic import Field, PrivateAttr
+from langchain_core.utils import secret_from_env
+from pydantic import Field, PrivateAttr, SecretStr
 
-from langchain_apify._error_messages import _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
-from langchain_apify._utils import _create_apify_client
+from langchain_apify._client import ApifyToolsClient
 
 if TYPE_CHECKING:
     from langchain_core.callbacks import (
@@ -19,7 +18,6 @@
         CallbackManagerForRetrieverRun,
     )
 
-_RAG_WEB_BROWSER_ACTOR_ID = 'apify/rag-web-browser'
 _DEFAULT_TIMEOUT_SECS = 300
 
 
@@ -52,20 +50,25 @@ class ApifySearchRetriever(BaseRetriever):
             docs = retriever.invoke("What is LangChain?")
     """
 
+    apify_api_token: SecretStr | None = Field(
+        default_factory=secret_from_env('APIFY_API_TOKEN', default=None),
+        description='Apify API token. Falls back to the APIFY_API_TOKEN environment variable when None.',
+        exclude=True,
+        repr=False,
+    )
     max_results: int = Field(default=5, description='Maximum number of documents to return.')
     timeout_secs: int = Field(default=_DEFAULT_TIMEOUT_SECS, description='Maximum Actor run time in seconds.')
 
-    _sync_client: ApifyClient = PrivateAttr()
-    _async_client: ApifyClientAsync = PrivateAttr()
+    _client: ApifyToolsClient = PrivateAttr()
+
+    def model_post_init(self, context: Any) -> None:  # noqa: ANN401
+        """Construct the underlying ``ApifyToolsClient``.
 
-    def __init__(self, apify_api_token: str | None = None, **kwargs: Any) -> None:  # noqa: ANN401
-        super().__init__(**kwargs)
-        token = apify_api_token or os.getenv('APIFY_API_TOKEN')
-        if not token:
-            msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
-            raise ValueError(msg)
-        self._sync_client = _create_apify_client(ApifyClient, token)
-        self._async_client = _create_apify_client(ApifyClientAsync, token)
+        The helper handles ``None`` / ``SecretStr`` / env-fallback and raises
+        ``ValueError`` if no token is available.
+        """
+        self._client = ApifyToolsClient(apify_api_token=self.apify_api_token)
+        super().model_post_init(context)
 
     def _get_relevant_documents(
         self,
@@ -73,29 +76,10 @@ def _get_relevant_documents(
         *,
         run_manager: CallbackManagerForRetrieverRun | None = None,  # noqa: ARG002
     ) -> list[Document]:
-        run_input = {
-            'query': query,
-            'maxResults': self.max_results,
-        }
-        run = self._sync_client.actor(_RAG_WEB_BROWSER_ACTOR_ID).call(
-            run_input=run_input,
+        items = self._client.rag_web_search(
+            query,
+            max_results=self.max_results,
             timeout_secs=self.timeout_secs,
-            logger=None,
-        )
-        if run is None:
-            return []
-
-        dataset_id = run.get('defaultDatasetId')
-        if not dataset_id:
-            return []
-
-        items = (
-            self._sync_client.dataset(dataset_id)
-            .list_items(
-                limit=self.max_results,
-                clean=True,
-            )
-            .items
         )
         return self._items_to_documents(items)
 
@@ -105,28 +89,13 @@ async def _aget_relevant_documents(
         *,
         run_manager: AsyncCallbackManagerForRetrieverRun | None = None,  # noqa: ARG002
     ) -> list[Document]:
-        run_input = {
-            'query': query,
-            'maxResults': self.max_results,
-        }
-        run = await self._async_client.actor(_RAG_WEB_BROWSER_ACTOR_ID).call(
-            run_input=run_input,
+        # ApifyToolsClient is sync-only.
+        items = await asyncio.to_thread(
+            self._client.rag_web_search,
+            query,
+            max_results=self.max_results,
             timeout_secs=self.timeout_secs,
-            logger=None,
         )
-        if run is None:
-            return []
-
-        dataset_id = run.get('defaultDatasetId')
-        if not dataset_id:
-            return []
-
-        items = (
-            await self._async_client.dataset(dataset_id).list_items(
-                limit=self.max_results,
-                clean=True,
-            )
-        ).items
         return self._items_to_documents(items)
 
     @staticmethod
diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index a012a1c..ec4359d 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -46,7 +46,6 @@
 CrawlerType = Literal['cheerio', 'playwright:adaptive', 'playwright:firefox']
 
 
-
 class ApifyActorsTool(BaseTool):  # type: ignore[override, override]
     """Tool that runs Apify Actors.
 
diff --git a/tests/unit_tests/test_retrievers.py b/tests/unit_tests/test_retrievers.py
index ee02dba..0106645 100644
--- a/tests/unit_tests/test_retrievers.py
+++ b/tests/unit_tests/test_retrievers.py
@@ -1,10 +1,13 @@
 from __future__ import annotations
 
-from unittest.mock import AsyncMock, MagicMock, patch
+from typing import Any
+from unittest.mock import MagicMock, patch
 
 import pytest
 from langchain_core.documents import Document
+from pydantic import SecretStr
 
+from langchain_apify._client import ApifyToolsClient
 from langchain_apify.retrievers import ApifySearchRetriever
 
 RAG_ITEMS: list[dict] = [
@@ -21,17 +24,12 @@
 ]
 
 
-def _make_retriever(
-    mock_sync_client: MagicMock,
-    mock_async_client: MagicMock | None = None,
-    **kwargs: object,
-) -> ApifySearchRetriever:
-    """Create a retriever with mocked Apify clients."""
-    with (
-        patch('langchain_apify.retrievers._create_apify_client') as mock_create,
-    ):
-        mock_create.side_effect = [mock_sync_client, mock_async_client or MagicMock()]
-        return ApifySearchRetriever(apify_api_token='dummy-token', **kwargs)
+def _make_retriever(mock_client: MagicMock, **kwargs: Any) -> ApifySearchRetriever:  # noqa: ANN401
+    """Instantiate a retriever with a mocked ApifyToolsClient."""
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        retriever = ApifySearchRetriever(apify_api_token=SecretStr('dummy-token'), **kwargs)
+    retriever._client = mock_client
+    return retriever
 
 
 # ---------------------------------------------------------------------------
@@ -46,15 +44,15 @@ def test_missing_token_raises(monkeypatch: pytest.MonkeyPatch) -> None:
 
 
 def test_init_with_explicit_token() -> None:
-    with patch('langchain_apify.retrievers._create_apify_client'):
-        retriever = ApifySearchRetriever(apify_api_token='my-token')
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        retriever = ApifySearchRetriever(apify_api_token=SecretStr('my-token'))
         assert retriever.max_results == 5
         assert retriever.timeout_secs == 300
 
 
 def test_init_custom_params() -> None:
-    with patch('langchain_apify.retrievers._create_apify_client'):
-        retriever = ApifySearchRetriever(apify_api_token='t', max_results=3, timeout_secs=60)
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        retriever = ApifySearchRetriever(apify_api_token=SecretStr('t'), max_results=3, timeout_secs=60)
         assert retriever.max_results == 3
         assert retriever.timeout_secs == 60
 
@@ -65,13 +63,8 @@ def test_init_custom_params() -> None:
 
 
 def test_sync_returns_documents() -> None:
-    mock_client = MagicMock()
-    mock_client.actor.return_value.call.return_value = {
-        'id': 'run-1',
-        'status': 'SUCCEEDED',
-        'defaultDatasetId': 'ds-1',
-    }
-    mock_client.dataset.return_value.list_items.return_value.items = RAG_ITEMS
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.rag_web_search.return_value = RAG_ITEMS
     retriever = _make_retriever(mock_client, max_results=5)
 
     docs = retriever._get_relevant_documents('test query')
@@ -85,43 +78,23 @@ def test_sync_returns_documents() -> None:
     assert docs[1].metadata['source'] == 'https://example.com/2'
 
 
-def test_sync_passes_correct_input() -> None:
-    mock_client = MagicMock()
-    mock_client.actor.return_value.call.return_value = {
-        'defaultDatasetId': 'ds-1',
-    }
-    mock_client.dataset.return_value.list_items.return_value.items = []
+def test_sync_calls_helper_with_correct_args() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.rag_web_search.return_value = []
     retriever = _make_retriever(mock_client, max_results=3, timeout_secs=60)
 
     retriever._get_relevant_documents('my search')
 
-    mock_client.actor.return_value.call.assert_called_once_with(
-        run_input={'query': 'my search', 'maxResults': 3},
+    mock_client.rag_web_search.assert_called_once_with(
+        'my search',
+        max_results=3,
         timeout_secs=60,
-        logger=None,
-    )
-    mock_client.dataset.return_value.list_items.assert_called_once_with(
-        limit=3,
-        clean=True,
     )
 
 
 def test_sync_empty_results() -> None:
-    mock_client = MagicMock()
-    mock_client.actor.return_value.call.return_value = {
-        'defaultDatasetId': 'ds-1',
-    }
-    mock_client.dataset.return_value.list_items.return_value.items = []
-    retriever = _make_retriever(mock_client)
-
-    docs = retriever._get_relevant_documents('test')
-
-    assert docs == []
-
-
-def test_sync_none_run_returns_empty() -> None:
-    mock_client = MagicMock()
-    mock_client.actor.return_value.call.return_value = None
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.rag_web_search.return_value = []
     retriever = _make_retriever(mock_client)
 
     docs = retriever._get_relevant_documents('test')
@@ -129,36 +102,28 @@ def test_sync_none_run_returns_empty() -> None:
     assert docs == []
 
 
-def test_sync_no_dataset_id_returns_empty() -> None:
-    mock_client = MagicMock()
-    mock_client.actor.return_value.call.return_value = {'id': 'run-1', 'defaultDatasetId': None}
+def test_sync_helper_failure_propagates() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.rag_web_search.side_effect = RuntimeError(
+        'Actor run run-bad ended with status FAILED.',
+    )
     retriever = _make_retriever(mock_client)
 
-    docs = retriever._get_relevant_documents('test')
-
-    assert docs == []
+    with pytest.raises(RuntimeError, match='FAILED'):
+        retriever._get_relevant_documents('test')
 
 
 # ---------------------------------------------------------------------------
-# _aget_relevant_documents (async)
+# Async retrieval
 # ---------------------------------------------------------------------------
 
 
 @pytest.mark.asyncio
 async def test_async_returns_documents() -> None:
-    mock_async = MagicMock()
-    mock_async.actor.return_value.call = AsyncMock(
-        return_value={
-            'id': 'run-1',
-            'status': 'SUCCEEDED',
-            'defaultDatasetId': 'ds-1',
-        }
-    )
-    mock_list_items = AsyncMock()
-    mock_list_items.return_value.items = RAG_ITEMS
-    mock_async.dataset.return_value.list_items = mock_list_items
-
-    retriever = _make_retriever(MagicMock(), mock_async, max_results=5)
+    """Async path wraps the sync helper via asyncio.to_thread."""
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.rag_web_search.return_value = RAG_ITEMS
+    retriever = _make_retriever(mock_client, max_results=5)
 
     docs = await retriever._aget_relevant_documents('test query')
 
@@ -169,27 +134,43 @@ async def test_async_returns_documents() -> None:
 
 
 @pytest.mark.asyncio
-async def test_async_none_run_returns_empty() -> None:
-    mock_async = MagicMock()
-    mock_async.actor.return_value.call = AsyncMock(return_value=None)
-    retriever = _make_retriever(MagicMock(), mock_async)
+async def test_async_calls_helper_with_correct_args() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.rag_web_search.return_value = []
+    retriever = _make_retriever(mock_client, max_results=3, timeout_secs=60)
 
-    docs = await retriever._aget_relevant_documents('test')
+    await retriever._aget_relevant_documents('my search')
 
-    assert docs == []
+    mock_client.rag_web_search.assert_called_once_with(
+        'my search',
+        max_results=3,
+        timeout_secs=60,
+    )
 
 
 @pytest.mark.asyncio
-async def test_async_no_dataset_id_returns_empty() -> None:
-    mock_async = MagicMock()
-    mock_async.actor.return_value.call = AsyncMock(return_value={'defaultDatasetId': None})
-    retriever = _make_retriever(MagicMock(), mock_async)
+async def test_async_empty_results() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.rag_web_search.return_value = []
+    retriever = _make_retriever(mock_client)
 
     docs = await retriever._aget_relevant_documents('test')
 
     assert docs == []
 
 
+@pytest.mark.asyncio
+async def test_async_helper_failure_propagates() -> None:
+    mock_client = MagicMock(spec=ApifyToolsClient)
+    mock_client.rag_web_search.side_effect = RuntimeError(
+        'Actor run run-bad ended with status FAILED.',
+    )
+    retriever = _make_retriever(mock_client)
+
+    with pytest.raises(RuntimeError, match='FAILED'):
+        await retriever._aget_relevant_documents('test')
+
+
 # ---------------------------------------------------------------------------
 # _items_to_documents edge cases
 # ---------------------------------------------------------------------------

From a060c146e649855d97d6ea0c9a2ee135f8902241 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 14:06:40 +0200
Subject: [PATCH 60/63] fix: normalise locale codes to lowercase to match Apify
 Actor schema

---
 langchain_apify/tools.py             | 19 ++++++++++++++---
 tests/unit_tests/test_actor_tools.py | 32 ++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/langchain_apify/tools.py b/langchain_apify/tools.py
index ec4359d..ff5c5b9 100644
--- a/langchain_apify/tools.py
+++ b/langchain_apify/tools.py
@@ -26,7 +26,7 @@
 from apify_client import ApifyClient
 from langchain_core.tools import BaseTool, ToolException
 from langchain_core.utils import secret_from_env
-from pydantic import BaseModel, Field, PrivateAttr, SecretStr, create_model
+from pydantic import BaseModel, Field, PrivateAttr, SecretStr, create_model, field_validator
 
 from langchain_apify._client import ApifyToolsClient
 from langchain_apify._error_messages import _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
@@ -267,10 +267,23 @@ class ApifyGoogleSearchInput(BaseModel):
 
     query: str = Field(description='Search query string.')
     max_results: int = Field(default=10, description='Maximum number of search results to return.')
-    country_code: str | None = Field(default=None, description='Two-letter country code for localised results.')
-    language_code: str | None = Field(default=None, description='Two-letter language code.')
+    country_code: str | None = Field(
+        default=None,
+        description='Two-letter country code (case-insensitive; normalised to lowercase, e.g. "us", "gb").',
+        pattern=r'^[a-zA-Z]{2}$',
+    )
+    language_code: str | None = Field(
+        default=None,
+        description='Two-letter language code (case-insensitive; normalised to lowercase, e.g. "en", "fr").',
+        pattern=r'^[a-zA-Z]{2}$',
+    )
     timeout_secs: int = Field(default=300, description='Maximum time in seconds to wait for the search to finish.')
 
+    @field_validator('country_code', 'language_code')
+    @classmethod
+    def _normalise_locale_code(cls, value: str | None) -> str | None:
+        return value.lower() if value else value
+
 
 class ApifyWebCrawlerInput(BaseModel):
     """Input schema for :class:`ApifyWebCrawlerTool`."""
diff --git a/tests/unit_tests/test_actor_tools.py b/tests/unit_tests/test_actor_tools.py
index fc03026..e7fcf58 100644
--- a/tests/unit_tests/test_actor_tools.py
+++ b/tests/unit_tests/test_actor_tools.py
@@ -89,6 +89,38 @@ def test_google_search_tool_missing_token(monkeypatch: pytest.MonkeyPatch) -> No
         ApifyGoogleSearchTool()
 
 
+@pytest.mark.parametrize('bad_code', ['USA', 'english', 'u', 'us1', ''])
+def test_google_search_tool_rejects_malformed_locale(mock_tools_client: MagicMock, bad_code: str) -> None:
+    """country_code and language_code must be exactly two letters."""
+    tool = make_tool(ApifyGoogleSearchTool, mock_tools_client)
+
+    with pytest.raises(ValueError, match='string_pattern_mismatch|String should match pattern'):
+        tool.invoke({'query': 'test', 'country_code': bad_code})
+
+    with pytest.raises(ValueError, match='string_pattern_mismatch|String should match pattern'):
+        tool.invoke({'query': 'test', 'language_code': bad_code})
+
+
+@pytest.mark.parametrize('raw_country', ['us', 'US', 'Us', 'uS'])
+def test_google_search_tool_normalises_country_code_to_lower(mock_tools_client: MagicMock, raw_country: str) -> None:
+    mock_tools_client.google_search.return_value = []
+    tool = make_tool(ApifyGoogleSearchTool, mock_tools_client)
+
+    tool.invoke({'query': 'test', 'country_code': raw_country})
+
+    assert mock_tools_client.google_search.call_args.kwargs['country_code'] == 'us'
+
+
+@pytest.mark.parametrize('raw_language', ['en', 'EN', 'En', 'eN'])
+def test_google_search_tool_normalises_language_code_to_lower(mock_tools_client: MagicMock, raw_language: str) -> None:
+    mock_tools_client.google_search.return_value = []
+    tool = make_tool(ApifyGoogleSearchTool, mock_tools_client)
+
+    tool.invoke({'query': 'test', 'language_code': raw_language})
+
+    assert mock_tools_client.google_search.call_args.kwargs['language_code'] == 'en'
+
+
 # ---------------------------------------------------------------------------
 # ApifyWebCrawlerTool
 # ---------------------------------------------------------------------------

From a908467795e5f9d7fad61d9463f89f2a93080080 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 28 Apr 2026 15:24:07 +0200
Subject: [PATCH 61/63] fix: extract source URL from metadata.url for
 apify/rag-web-browser

---
 langchain_apify/retrievers.py       |  8 ++++++--
 tests/unit_tests/test_retrievers.py | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/langchain_apify/retrievers.py b/langchain_apify/retrievers.py
index 67a5d0a..80a2099 100644
--- a/langchain_apify/retrievers.py
+++ b/langchain_apify/retrievers.py
@@ -104,9 +104,13 @@ def _items_to_documents(items: list[dict]) -> list[Document]:
         docs: list[Document] = []
         for item in items:
             page_content = item.get('text') or item.get('markdown') or ''
+            raw_meta = item.get('metadata')
+            item_metadata: dict = raw_meta if isinstance(raw_meta, dict) else {}
             metadata: dict[str, Any] = {
-                'source': item.get('crawledUrl') or item.get('url', ''),
-                'title': item.get('metadata', {}).get('title', '') if isinstance(item.get('metadata'), dict) else '',
+                # apify/rag-web-browser nests url/title under "metadata"; older
+                # Actors and tests use top-level keys. Both are supported.
+                'source': item.get('crawledUrl') or item.get('url') or item_metadata.get('url', ''),
+                'title': item_metadata.get('title', ''),
             }
             docs.append(Document(page_content=page_content, metadata=metadata))
         return docs
diff --git a/tests/unit_tests/test_retrievers.py b/tests/unit_tests/test_retrievers.py
index 0106645..17dfba9 100644
--- a/tests/unit_tests/test_retrievers.py
+++ b/tests/unit_tests/test_retrievers.py
@@ -184,6 +184,21 @@ def test_items_to_documents_uses_url_fallback() -> None:
     assert docs[0].metadata['source'] == 'https://fallback.com'
 
 
+def test_items_to_documents_uses_metadata_url_fallback() -> None:
+    """apify/rag-web-browser nests the page URL under metadata.url."""
+    items = [
+        {
+            'metadata': {'url': 'https://nested.example.com', 'title': 'Nested'},
+            'text': 'content',
+        },
+    ]
+
+    docs = ApifySearchRetriever._items_to_documents(items)
+
+    assert docs[0].metadata['source'] == 'https://nested.example.com'
+    assert docs[0].metadata['title'] == 'Nested'
+
+
 def test_items_to_documents_uses_markdown_fallback() -> None:
     items = [{'crawledUrl': 'https://example.com', 'markdown': '# MD content', 'metadata': {'title': 'T'}}]
 

From 250e1ac81feeb1c654212317eeda9194fc50d073 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 5 May 2026 10:53:41 +0200
Subject: [PATCH 62/63] fix: rename actor search group

---
 langchain_apify/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
index 287e2ab..573365b 100644
--- a/langchain_apify/__init__.py
+++ b/langchain_apify/__init__.py
@@ -39,7 +39,7 @@
     ApifyRunTaskAndGetDatasetTool,
 ]
 
-APIFY_ACTOR_TOOLS: list[type[BaseTool]] = [
+APIFY_SEARCH_TOOLS: list[type[BaseTool]] = [
     ApifyGoogleSearchTool,
     ApifyWebCrawlerTool,
 ]
@@ -64,7 +64,7 @@
     # Loaders
     'ApifyCrawlLoader',
     # Tool group lists
-    'APIFY_ACTOR_TOOLS',
+    'APIFY_SEARCH_TOOLS',
     'APIFY_CORE_TOOLS',
     # Meta
     '__version__',

From f4cf20e7bad5ee627157cd8da95fa1f88c557e65 Mon Sep 17 00:00:00 2001
From: David Omrai <david.omrai@seznam.cz>
Date: Tue, 5 May 2026 10:55:16 +0200
Subject: [PATCH 63/63] fix: test fix

---
 tests/unit_tests/test_actor_tools.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/unit_tests/test_actor_tools.py b/tests/unit_tests/test_actor_tools.py
index e7fcf58..2b14809 100644
--- a/tests/unit_tests/test_actor_tools.py
+++ b/tests/unit_tests/test_actor_tools.py
@@ -7,7 +7,7 @@
 from langchain_core.tools import ToolException
 from pydantic import SecretStr
 
-from langchain_apify import APIFY_ACTOR_TOOLS, ApifyGoogleSearchTool, ApifyWebCrawlerTool
+from langchain_apify import APIFY_SEARCH_TOOLS, ApifyGoogleSearchTool, ApifyWebCrawlerTool
 from langchain_apify._client import ApifyToolsClient
 from langchain_apify.tools import _ApifyGenericTool
 from tests.unit_tests.conftest import make_tool
@@ -233,6 +233,6 @@ def test_actor_tools_have_correct_metadata() -> None:
         assert tool.handle_tool_error is True
 
 
-def test_apify_actor_tools_list() -> None:
-    assert set(APIFY_ACTOR_TOOLS) == {ApifyGoogleSearchTool, ApifyWebCrawlerTool}
-    assert len(APIFY_ACTOR_TOOLS) == 2
+def test_apify_search_tools_list() -> None:
+    assert set(APIFY_SEARCH_TOOLS) == {ApifyGoogleSearchTool, ApifyWebCrawlerTool}
+    assert len(APIFY_SEARCH_TOOLS) == 2