apify · usanvict · May 15, 2026
diff --git a/tests/integration_tests/test_e2e_auth.py b/tests/integration_tests/test_e2e_auth.py
@@ -0,0 +1,26 @@
+"""Auth failure tests: missing token and invalid token.
+
+These do NOT require a valid APIFY_API_TOKEN.
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from langchain_apify import ApifyRunActorTool
+
+
+def test_missing_token_raises_value_error(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv('APIFY_API_TOKEN', raising=False)
+    with pytest.raises(ValueError, match='APIFY_API_TOKEN'):
+        ApifyRunActorTool()
+
+
+def test_invalid_token_returns_error_string() -> None:
+    tool = ApifyRunActorTool(apify_api_token='invalid_token_xyz_000')
+    result = tool.invoke({'actor_id': 'apify/python-example', 'run_input': {}})
+    assert isinstance(result, str)
+    with pytest.raises(json.JSONDecodeError):
+        json.loads(result)
diff --git a/tests/integration_tests/test_e2e_happy.py b/tests/integration_tests/test_e2e_happy.py
@@ -0,0 +1,64 @@
+"""E2E happy-path tests for all 6 core tools against the live Apify API.
+
+Requires APIFY_API_TOKEN. Uses apify/python-example for fast, cheap runs.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+
+import pytest
+
+from langchain_apify import (
+    ApifyGetDatasetItemsTool,
+    ApifyRunActorAndGetDatasetTool,
+    ApifyRunActorTool,
+    ApifyRunTaskAndGetDatasetTool,
+    ApifyRunTaskTool,
+    ApifyScrapeUrlTool,
+)
+
+_ACTOR_ID = 'apify/python-example'
+_RUN_INPUT = {'first_number': 2, 'second_number': 3}
+_TASK_ID = os.getenv('APIFY_TASK_ID', 'dx_heroes/hello-world-task')
+_RUN_KEYS = {'run_id', 'status', 'dataset_id', 'started_at', 'finished_at'}
+
+pytestmark = pytest.mark.skipif(not os.getenv('APIFY_API_TOKEN'), reason='APIFY_API_TOKEN not set')
+
+
+def test_run_actor(self) -> None:
+    parsed = json.loads(ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT}))
+    assert set(parsed.keys()) == _RUN_KEYS
+    assert parsed['status'] == 'SUCCEEDED'
+
+
+def test_get_dataset_items() -> None:
+    ds_id = json.loads(ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT}))['dataset_id']
+    parsed = json.loads(ApifyGetDatasetItemsTool().invoke({'dataset_id': ds_id, 'limit': 10}))
+    assert isinstance(parsed['items'], list)
+
+
+def test_run_actor_and_get_dataset() -> None:
+    parsed = json.loads(ApifyRunActorAndGetDatasetTool().invoke(
+        {'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT, 'dataset_items_limit': 10}
+    ))
+    assert parsed['run']['status'] == 'SUCCEEDED'
+    assert isinstance(parsed['items'], list)
+
+
+def test_scrape_url() -> None:
+    result = ApifyScrapeUrlTool().invoke({'url': 'https://crawlee.dev'})
+    assert isinstance(result, str) and len(result) > 100
+
+
+@pytest.mark.skipif(not _TASK_ID, reason='APIFY_TASK_ID not set')
+def test_run_task() -> None:
+    parsed = json.loads(ApifyRunTaskTool().invoke({'task_id': _TASK_ID}))
+    assert parsed['status'] == 'SUCCEEDED'
+
+
+@pytest.mark.skipif(not _TASK_ID, reason='APIFY_TASK_ID not set')
+def test_run_task_and_get_dataset() -> None:
+    parsed = json.loads(ApifyRunTaskAndGetDatasetTool().invoke({'task_id': _TASK_ID, 'dataset_items_limit': 5}))
+    assert parsed['run']['status'] == 'SUCCEEDED'
diff --git a/tests/integration_tests/test_e2e_negative.py b/tests/integration_tests/test_e2e_negative.py
@@ -0,0 +1,32 @@
+"""E2E negative tests: invalid IDs produce graceful error strings.
+
+Requires APIFY_API_TOKEN.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+
+import pytest
+
+from langchain_apify import (
+    ApifyGetDatasetItemsTool,
+    ApifyRunActorTool,
+    ApifyRunTaskTool,
+)
+
+pytestmark = pytest.mark.skipif(not os.getenv('APIFY_API_TOKEN'), reason='APIFY_API_TOKEN not set')
+
+
+@pytest.mark.parametrize('tool_cls,invoke_args', [
+    (ApifyRunActorTool, {'actor_id': 'definitely/not-a-real-actor'}),
+    (ApifyGetDatasetItemsTool, {'dataset_id': 'definitely-not-a-real-dataset-id'}),
+    (ApifyRunTaskTool, {'task_id': 'definitely/not-a-real-task'}),
+])
+def test_invalid_id_returns_error_string(tool_cls, invoke_args) -> None:
+    """handle_tool_error=True ensures no exception propagates; returns a string."""
+    result = tool_cls().invoke(invoke_args)
+    assert isinstance(result, str) and len(result) > 0
+    with pytest.raises(json.JSONDecodeError):
+        json.loads(result)
diff --git a/tests/integration_tests/test_e2e_stability.py b/tests/integration_tests/test_e2e_stability.py
@@ -0,0 +1,39 @@
+"""Stability tests: repeated invocations produce structurally consistent output.
+
+Requires APIFY_API_TOKEN.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+
+import pytest
+
+from langchain_apify import ApifyGetDatasetItemsTool, ApifyRunActorTool
+
+_ACTOR_ID = 'apify/python-example'
+_RUN_INPUT = {'first_number': 2, 'second_number': 3}
+
+pytestmark = pytest.mark.skipif(not os.getenv('APIFY_API_TOKEN'), reason='APIFY_API_TOKEN not set')
+
+
+def test_repeated_runs_have_consistent_shape() -> None:
+    tool = ApifyRunActorTool()
+    results = [json.loads(tool.invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT})) for _ in range(3)]
+
+    for r in results:
+        assert set(r.keys()) == {'run_id', 'status', 'dataset_id', 'started_at', 'finished_at'}
+        assert r['status'] == 'SUCCEEDED'
+    assert len({r['dataset_id'] for r in results}) == 3  # unique per run
+
+
+def test_same_dataset_returns_identical_items() -> None:
+    ds_id = json.loads(
+        ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT})
+    )['dataset_id']
+
+    tool = ApifyGetDatasetItemsTool()
+    results = [json.loads(tool.invoke({'dataset_id': ds_id, 'limit': 10})) for _ in range(3)]
+
+    assert all(r['items'] == results[0]['items'] for r in results)
diff --git a/tests/integration_tests/test_e2e_timeout.py b/tests/integration_tests/test_e2e_timeout.py
@@ -0,0 +1,43 @@
+"""Timeout and threshold tests against the live Apify API.
+
+Requires APIFY_API_TOKEN.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+
+import pytest
+
+from langchain_apify import ApifyGetDatasetItemsTool, ApifyRunActorTool
+
+_ACTOR_ID = 'apify/python-example'
+_RUN_INPUT = {'first_number': 2, 'second_number': 3}
+
+pytestmark = pytest.mark.skipif(not os.getenv('APIFY_API_TOKEN'), reason='APIFY_API_TOKEN not set')
+
+
+def test_offset_beyond_dataset_returns_empty() -> None:
+    ds_id = json.loads(
+        ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT})
+    )['dataset_id']
+
+    result = json.loads(ApifyGetDatasetItemsTool().invoke({'dataset_id': ds_id, 'limit': 10, 'offset': 999999}))
+    assert result['items'] == []
+
+
+def test_limit_larger_than_dataset_returns_available() -> None:
+    ds_id = json.loads(
+        ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT})
+    )['dataset_id']
+
+    result = json.loads(ApifyGetDatasetItemsTool().invoke({'dataset_id': ds_id, 'limit': 1000}))
+    assert isinstance(result['items'], list)
+
+
+def test_memory_128mb_accepted() -> None:
+    parsed = json.loads(ApifyRunActorTool().invoke({
+        'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT, 'memory_mbytes': 128,
+    }))
+    assert parsed['status'] == 'SUCCEEDED'
diff --git a/tests/unit_tests/test_clamping.py b/tests/unit_tests/test_clamping.py
@@ -0,0 +1,69 @@
+"""Unit tests for clamping boundaries NOT covered by the existing test_tools.py.
+
+Focuses on:
+- _clamp_depth (not tested elsewhere)
+- Exact at-boundary values (existing only tests above-max)
+- Configurable thresholds with relaxed limits
+"""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import pytest
+
+from langchain_apify._client import ApifyToolsClient
+from langchain_apify.tools import ApifyRunActorTool, _ApifyGenericTool
+
+
+def _make_tool(**kwargs) -> _ApifyGenericTool:  # type: ignore[type-arg]
+    with patch.object(ApifyToolsClient, '__init__', return_value=None):
+        return ApifyRunActorTool(apify_api_token='dummy', **kwargs)
+
+
+class TestClampDepth:
+    """_clamp_depth is not covered by existing tests at all."""
+
+    @pytest.mark.parametrize('input_val,expected', [
+        (-999, 0),
+        (-1, 0),
+        (0, 0),
+        (3, 3),
+        (5, 5),
+        (100, 5),
+    ])
+    def test_boundaries(self, input_val: int, expected: int) -> None:
+        tool = _make_tool(max_crawl_depth=5)
+        assert tool._clamp_depth(input_val) == expected
+
+
+class TestAtExactMax:
+    """Existing tests only check above-max. Verify at-max passes through."""
+
+    def test_timeout_at_max(self) -> None:
+        assert _make_tool(max_timeout_secs=600)._clamp_timeout(600) == 600
+
+    def test_memory_at_max(self) -> None:
+        assert _make_tool(max_memory_mbytes=32768)._clamp_memory(32768) == 32768
+
+    def test_items_at_max(self) -> None:
+        assert _make_tool(max_items=1000)._clamp_items(1000) == 1000
+
+    def test_depth_at_max(self) -> None:
+        assert _make_tool(max_crawl_depth=5)._clamp_depth(5) == 5
+
+
+class TestRelaxedLimits:
+    """Verify relaxed custom limits allow higher values."""
+
+    def test_high_limits_pass_through(self) -> None:
+        tool = _make_tool(
+            max_timeout_secs=9999,
+            max_items=50000,
+            max_memory_mbytes=65536,
+            max_crawl_depth=20,
+        )
+        assert tool._clamp_timeout(5000) == 5000
+        assert tool._clamp_items(30000) == 30000
+        assert tool._clamp_memory(65536) == 65536
+        assert tool._clamp_depth(15) == 15