apify · daveomri · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ pip install langchain-apify
 ## Prerequisites
 
 You should configure credentials by setting the following environment variables:
-- `APIFY_API_TOKEN` - Apify API token
+- `APIFY_TOKEN` - Apify API token
 
 Register your free Apify account [here](https://console.apify.com/sign-up) and learn how to get your API token in the [Apify documentation](https://docs.apify.com/platform/integrations/api).
 
@@ -57,7 +57,7 @@ import json
 from langchain_apify import ApifyActorsTool
 
 os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"
-os.environ["APIFY_API_TOKEN"] = "YOUR_APIFY_API_TOKEN"
+os.environ["APIFY_TOKEN"] = "YOUR_APIFY_TOKEN"
 
 browser = ApifyActorsTool('apify/rag-web-browser')
 search_results = browser.invoke(input={
@@ -92,7 +92,7 @@ Example usage for `ApifyDatasetLoader` with a custom dataset mapping function fo
 import os
 from langchain_apify import ApifyDatasetLoader
 
-os.environ["APIFY_API_TOKEN"] = "YOUR_APIFY_API_TOKEN"
+os.environ["APIFY_TOKEN"] = "YOUR_APIFY_TOKEN"
 
 # Example dataset structure
 # [
@@ -129,7 +129,7 @@ import os
 from langchain_apify import ApifyWrapper
 from langchain_core.documents import Document
 
-os.environ["APIFY_API_TOKEN"] = "YOUR_APIFY_API_TOKEN"
+os.environ["APIFY_TOKEN"] = "YOUR_APIFY_TOKEN"
 
 apify = ApifyWrapper()
 

diff --git a/langchain_apify/__init__.py b/langchain_apify/__init__.py
@@ -1,19 +1,56 @@
+from __future__ import annotations
+
 from importlib import metadata
+from typing import TYPE_CHECKING
 
 from langchain_apify.document_loaders import ApifyDatasetLoader
-from langchain_apify.tools import ApifyActorsTool
+from langchain_apify.tools import (
+    ApifyActorsTool,
+    ApifyGetDatasetItemsTool,
+    ApifyRunActorAndGetDatasetTool,
+    ApifyRunActorTool,
+    ApifyRunTaskAndGetDatasetTool,
+    ApifyRunTaskTool,
+    ApifyScrapeUrlTool,
+)
 from langchain_apify.wrappers import ApifyWrapper
 
+if TYPE_CHECKING:
+    from langchain_core.tools import BaseTool
+
 try:
     __version__ = metadata.version(__package__)
 except metadata.PackageNotFoundError:
-    # Case where package metadata is not available.
     __version__ = ''
 del metadata  # optional, avoids polluting the results of dir(__package__)
 
+# Convenience tool-class lists for selective agent binding.
+# Binding all tools at once overwhelms the LLM context window;
+# pick the group(s) relevant to your use case.
+
+APIFY_CORE_TOOLS: list[type[BaseTool]] = [
+    ApifyRunActorTool,
+    ApifyGetDatasetItemsTool,
+    ApifyRunActorAndGetDatasetTool,
+    ApifyScrapeUrlTool,
+    ApifyRunTaskTool,
+    ApifyRunTaskAndGetDatasetTool,
+]
+
 __all__ = [
+    # Existing components (backward-compatible)
     'ApifyActorsTool',
     'ApifyDatasetLoader',
     'ApifyWrapper',
+    # Core generic tools
+    'ApifyGetDatasetItemsTool',
+    'ApifyRunActorAndGetDatasetTool',
+    'ApifyRunActorTool',
+    'ApifyRunTaskAndGetDatasetTool',
+    'ApifyRunTaskTool',
+    'ApifyScrapeUrlTool',
+    # Tool group lists
+    'APIFY_CORE_TOOLS',
+    # Meta
     '__version__',
 ]
diff --git a/langchain_apify/_client.py b/langchain_apify/_client.py
@@ -0,0 +1,256 @@
+from __future__ import annotations
+
+import httpx
+from apify_client import ApifyClient
+from apify_client.errors import ApifyClientError
+from pydantic import SecretStr
+
+from langchain_apify._error_messages import (
+    _ERROR_ACTOR_RUN_FAILED,
+    _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET,
+    _ERROR_SCRAPE_EMPTY,
+)
+from langchain_apify._utils import _create_apify_client, _resolve_apify_token
+
+# Only catches ApifyClientError and httpx.HTTPError. Other errors propagate.
+_TRANSPORT_EXCEPTIONS = (ApifyClientError, httpx.HTTPError)
+
+_SCRAPE_ACTOR_ID = 'apify/website-content-crawler'
+_DEFAULT_RUN_TIMEOUT_SECS = 300
+_DEFAULT_SCRAPE_TIMEOUT_SECS = 120
+_DEFAULT_DATASET_ITEMS_LIMIT = 100
+_RUN_STATUS_SUCCEEDED = 'SUCCEEDED'
+
+
+class ApifyToolsClient:
+    """Internal helper that wraps ``ApifyClient`` for the tools layer.
+
+    One convenience method per tool operation. All methods are synchronous and
+    block until the Actor run finishes.
+
+    Args:
+        apify_api_token: Apify API token. Falls back to the ``APIFY_TOKEN``
+            environment variable (or ``APIFY_API_TOKEN`` for backwards
+            compatibility) when *None*.
+
+    Raises:
+        ValueError: If no token is provided and the env var is not set.
+    """
+
+    def __init__(self, apify_api_token: SecretStr | str | None = None) -> None:
+        if isinstance(apify_api_token, SecretStr):
+            _token: str | None = apify_api_token.get_secret_value()
+        else:
+            _token = apify_api_token or _resolve_apify_token()
+
+        if not _token:
+            msg = _ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET
+            raise ValueError(msg)
+        self._client = _create_apify_client(ApifyClient, _token)
+
+    def run_actor(
+        self,
+        actor_id: str,
+        run_input: dict | None = None,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
+        memory_mbytes: int | None = None,
+    ) -> dict:
+        """Start an Actor and block until it finishes.
+
+        Args:
+            actor_id: Actor ID or name (e.g. ``"apify/python-example"``).
+            run_input: JSON-serialisable input for the Actor.
+            timeout_secs: Maximum time to wait for the run to finish.
+            memory_mbytes: Memory limit for the run, or *None* for Actor default.
+
+        Returns:
+            Full run-details dict returned by the Apify API.
+
+        Raises:
+            RuntimeError: If the run does not finish with status ``SUCCEEDED``.
+        """
+        call_kwargs: dict = {'run_input': run_input, 'timeout_secs': timeout_secs, 'logger': None}
+        if memory_mbytes is not None:
+            call_kwargs['memory_mbytes'] = memory_mbytes
+
+        try:
+            run = self._client.actor(actor_id).call(**call_kwargs)
+        except _TRANSPORT_EXCEPTIONS as exc:
+            msg = f'Apify Actor call failed for {actor_id}: {exc}'
+            raise RuntimeError(msg) from exc
+        if run is None:
+            msg = f'Actor {actor_id} call returned no run details.'
+            raise RuntimeError(msg)
+        self._check_run_status(run)
+        return run
+
+    def get_dataset_items(
+        self, dataset_id: str, limit: int = _DEFAULT_DATASET_ITEMS_LIMIT, offset: int = 0
+    ) -> list[dict]:
+        """Fetch items from an existing dataset.
+
+        Args:
+            dataset_id: Apify dataset ID.
+            limit: Maximum number of items to return.
+            offset: Number of items to skip from the start.
+
+        Returns:
+            List of dataset item dicts (may be empty).
+        """
+        try:
+            return self._client.dataset(dataset_id).list_items(limit=limit, offset=offset, clean=True).items
+        except _TRANSPORT_EXCEPTIONS as exc:
+            msg = f'Apify dataset fetch failed for {dataset_id}: {exc}'
+            raise RuntimeError(msg) from exc
+
+    def run_actor_and_get_items(
+        self,
+        actor_id: str,
+        run_input: dict | None = None,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
+        memory_mbytes: int | None = None,
+        dataset_items_limit: int = _DEFAULT_DATASET_ITEMS_LIMIT,
+    ) -> tuple[dict, list[dict]]:
+        """Run an Actor, then fetch items from its default dataset.
+
+        Args:
+            actor_id: Actor ID or name.
+            run_input: JSON-serialisable input for the Actor.
+            timeout_secs: Maximum time to wait for the run to finish.
+            memory_mbytes: Memory limit for the run, or *None* for Actor default.
+            dataset_items_limit: Maximum number of dataset items to return.
+
+        Returns:
+            A ``(run_details, items)`` tuple.
+
+        Raises:
+            RuntimeError: If the run does not finish with status ``SUCCEEDED``.
+        """
+        run = self.run_actor(actor_id, run_input, timeout_secs, memory_mbytes)
+        dataset_id = run.get('defaultDatasetId')
+        if not dataset_id:
+            msg = f'Actor {actor_id} run succeeded but returned no default dataset ID.'
+            raise RuntimeError(msg)
+        items = self._list_items_or_raise(dataset_id, dataset_items_limit)
+        return run, items
+
+    def run_task(
+        self,
+        task_id: str,
+        task_input: dict | None = None,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
+        memory_mbytes: int | None = None,
+    ) -> dict:
+        """Start a saved Actor task and block until it finishes.
+
+        Args:
+            task_id: Task ID or name (e.g. ``"user/my-task"``).
+            task_input: JSON-serialisable input that overrides the task's
+                pre-saved input.
+            timeout_secs: Maximum time to wait for the run to finish.
+            memory_mbytes: Memory limit for the run, or *None* for task default.
+
+        Returns:
+            Full run-details dict returned by the Apify API.
+
+        Raises:
+            RuntimeError: If the run does not finish with status ``SUCCEEDED``.
+        """
+        call_kwargs: dict = {'task_input': task_input, 'timeout_secs': timeout_secs}
+        if memory_mbytes is not None:
+            call_kwargs['memory_mbytes'] = memory_mbytes
+
+        try:
+            run = self._client.task(task_id).call(**call_kwargs)
+        except _TRANSPORT_EXCEPTIONS as exc:
+            msg = f'Apify task call failed for {task_id}: {exc}'
+            raise RuntimeError(msg) from exc
+        if run is None:
+            msg = f'Task {task_id} call returned no run details.'
+            raise RuntimeError(msg)
+        self._check_run_status(run)
+        return run
+
+    def run_task_and_get_items(
+        self,
+        task_id: str,
+        task_input: dict | None = None,
+        timeout_secs: int = _DEFAULT_RUN_TIMEOUT_SECS,
+        memory_mbytes: int | None = None,
+        dataset_items_limit: int = _DEFAULT_DATASET_ITEMS_LIMIT,
+    ) -> tuple[dict, list[dict]]:
+        """Run a saved Actor task, then fetch items from its default dataset.
+
+        Args:
+            task_id: Task ID or name.
+            task_input: JSON-serialisable input that overrides the task's
+                pre-saved input.
+            timeout_secs: Maximum time to wait for the run to finish.
+            memory_mbytes: Memory limit for the run, or *None* for task default.
+            dataset_items_limit: Maximum number of dataset items to return.
+
+        Returns:
+            A ``(run_details, items)`` tuple.
+
+        Raises:
+            RuntimeError: If the run does not finish with status ``SUCCEEDED``.
+        """
+        run = self.run_task(task_id, task_input, timeout_secs, memory_mbytes)
+        dataset_id = run.get('defaultDatasetId')
+        if not dataset_id:
+            msg = f'Task {task_id} run succeeded but returned no default dataset ID.'
+            raise RuntimeError(msg)
+        items = self._list_items_or_raise(dataset_id, dataset_items_limit)
+        return run, items
+
+    def scrape_url(self, url: str, timeout_secs: int = _DEFAULT_SCRAPE_TIMEOUT_SECS) -> str:
+        """Scrape a single URL and return its content as markdown.
+
+        Uses ``apify/website-content-crawler`` with ``maxCrawlPages=1``.
+
+        Args:
+            url: The URL to scrape.
+            timeout_secs: Maximum time to wait for the crawl to finish.
+
+        Returns:
+            Markdown (or plain-text fallback) content of the page.
+
+        Raises:
+            RuntimeError: If the Actor run fails or no content is extracted.
+        """
+        run_input = {
+            'startUrls': [{'url': url}],
+            'maxCrawlPages': 1,
+        }
+        _, items = self.run_actor_and_get_items(
+            _SCRAPE_ACTOR_ID,
+            run_input=run_input,
+            timeout_secs=timeout_secs,
+            dataset_items_limit=1,
+        )
+        if not items:
+            msg = _ERROR_SCRAPE_EMPTY.format(url=url)
+            raise RuntimeError(msg)
+
+        content = items[0].get('markdown') or items[0].get('text') or ''
+        if not content:
+            msg = _ERROR_SCRAPE_EMPTY.format(url=url)
+            raise RuntimeError(msg)
+        return content
+
+    def _list_items_or_raise(self, dataset_id: str, limit: int) -> list[dict]:
+        """Fetch dataset items, wrapping any network error in a RuntimeError."""
+        try:
+            return self._client.dataset(dataset_id).list_items(limit=limit, clean=True).items
+        except _TRANSPORT_EXCEPTIONS as exc:
+            msg = f'Apify dataset fetch failed for {dataset_id}: {exc}'
+            raise RuntimeError(msg) from exc
+
+    @staticmethod
+    def _check_run_status(run: dict) -> None:
+        """Raise if the run did not succeed."""
+        status = run.get('status')
+        if status != _RUN_STATUS_SUCCEEDED:
+            run_id = run.get('id', 'unknown')
+            msg = _ERROR_ACTOR_RUN_FAILED.format(run_id=run_id, status=status)
+            raise RuntimeError(msg)
diff --git a/langchain_apify/_error_messages.py b/langchain_apify/_error_messages.py
@@ -0,0 +1,11 @@
+_ERROR_APIFY_TOKEN_ENV_VAR_NOT_SET = (
+    'APIFY_TOKEN environment variable is not set.'
+    ' Please set it to your Apify API token by using `os.environ["APIFY_TOKEN"] = "YOUR_APIFY_TOKEN"`'
+    ' in your code or pass it as environment variable.'
+    ' To pass it as environment variable, you can use the following command:'
+    ' `APIFY_TOKEN="YOUR_APIFY_TOKEN" python your_script.py`'
+)
+
+_ERROR_ACTOR_RUN_FAILED = 'Actor run {run_id} ended with status {status}.'
+
+_ERROR_SCRAPE_EMPTY = 'No content extracted from {url}.'