diff --git a/tests/integration_tests/test_e2e_auth.py b/tests/integration_tests/test_e2e_auth.py new file mode 100644 index 0000000..e8d9714 --- /dev/null +++ b/tests/integration_tests/test_e2e_auth.py @@ -0,0 +1,26 @@ +"""Auth failure tests: missing token and invalid token. + +These do NOT require a valid APIFY_API_TOKEN. +""" + +from __future__ import annotations + +import json + +import pytest + +from langchain_apify import ApifyRunActorTool + + +def test_missing_token_raises_value_error(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv('APIFY_API_TOKEN', raising=False) + with pytest.raises(ValueError, match='APIFY_API_TOKEN'): + ApifyRunActorTool() + + +def test_invalid_token_returns_error_string() -> None: + tool = ApifyRunActorTool(apify_api_token='invalid_token_xyz_000') + result = tool.invoke({'actor_id': 'apify/python-example', 'run_input': {}}) + assert isinstance(result, str) + with pytest.raises(json.JSONDecodeError): + json.loads(result) diff --git a/tests/integration_tests/test_e2e_happy.py b/tests/integration_tests/test_e2e_happy.py new file mode 100644 index 0000000..61fa05a --- /dev/null +++ b/tests/integration_tests/test_e2e_happy.py @@ -0,0 +1,64 @@ +"""E2E happy-path tests for all 6 core tools against the live Apify API. + +Requires APIFY_API_TOKEN. Uses apify/python-example for fast, cheap runs. +""" + +from __future__ import annotations + +import json +import os + +import pytest + +from langchain_apify import ( + ApifyGetDatasetItemsTool, + ApifyRunActorAndGetDatasetTool, + ApifyRunActorTool, + ApifyRunTaskAndGetDatasetTool, + ApifyRunTaskTool, + ApifyScrapeUrlTool, +) + +_ACTOR_ID = 'apify/python-example' +_RUN_INPUT = {'first_number': 2, 'second_number': 3} +_TASK_ID = os.getenv('APIFY_TASK_ID', 'dx_heroes/hello-world-task') +_RUN_KEYS = {'run_id', 'status', 'dataset_id', 'started_at', 'finished_at'} + +pytestmark = pytest.mark.skipif(not os.getenv('APIFY_API_TOKEN'), reason='APIFY_API_TOKEN not set') + + +def test_run_actor(self) -> None: + parsed = json.loads(ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT})) + assert set(parsed.keys()) == _RUN_KEYS + assert parsed['status'] == 'SUCCEEDED' + + +def test_get_dataset_items() -> None: + ds_id = json.loads(ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT}))['dataset_id'] + parsed = json.loads(ApifyGetDatasetItemsTool().invoke({'dataset_id': ds_id, 'limit': 10})) + assert isinstance(parsed['items'], list) + + +def test_run_actor_and_get_dataset() -> None: + parsed = json.loads(ApifyRunActorAndGetDatasetTool().invoke( + {'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT, 'dataset_items_limit': 10} + )) + assert parsed['run']['status'] == 'SUCCEEDED' + assert isinstance(parsed['items'], list) + + +def test_scrape_url() -> None: + result = ApifyScrapeUrlTool().invoke({'url': 'https://crawlee.dev'}) + assert isinstance(result, str) and len(result) > 100 + + +@pytest.mark.skipif(not _TASK_ID, reason='APIFY_TASK_ID not set') +def test_run_task() -> None: + parsed = json.loads(ApifyRunTaskTool().invoke({'task_id': _TASK_ID})) + assert parsed['status'] == 'SUCCEEDED' + + +@pytest.mark.skipif(not _TASK_ID, reason='APIFY_TASK_ID not set') +def test_run_task_and_get_dataset() -> None: + parsed = json.loads(ApifyRunTaskAndGetDatasetTool().invoke({'task_id': _TASK_ID, 'dataset_items_limit': 5})) + assert parsed['run']['status'] == 'SUCCEEDED' diff --git a/tests/integration_tests/test_e2e_negative.py b/tests/integration_tests/test_e2e_negative.py new file mode 100644 index 0000000..5acf953 --- /dev/null +++ b/tests/integration_tests/test_e2e_negative.py @@ -0,0 +1,32 @@ +"""E2E negative tests: invalid IDs produce graceful error strings. + +Requires APIFY_API_TOKEN. +""" + +from __future__ import annotations + +import json +import os + +import pytest + +from langchain_apify import ( + ApifyGetDatasetItemsTool, + ApifyRunActorTool, + ApifyRunTaskTool, +) + +pytestmark = pytest.mark.skipif(not os.getenv('APIFY_API_TOKEN'), reason='APIFY_API_TOKEN not set') + + +@pytest.mark.parametrize('tool_cls,invoke_args', [ + (ApifyRunActorTool, {'actor_id': 'definitely/not-a-real-actor'}), + (ApifyGetDatasetItemsTool, {'dataset_id': 'definitely-not-a-real-dataset-id'}), + (ApifyRunTaskTool, {'task_id': 'definitely/not-a-real-task'}), +]) +def test_invalid_id_returns_error_string(tool_cls, invoke_args) -> None: + """handle_tool_error=True ensures no exception propagates; returns a string.""" + result = tool_cls().invoke(invoke_args) + assert isinstance(result, str) and len(result) > 0 + with pytest.raises(json.JSONDecodeError): + json.loads(result) diff --git a/tests/integration_tests/test_e2e_stability.py b/tests/integration_tests/test_e2e_stability.py new file mode 100644 index 0000000..fe2d38f --- /dev/null +++ b/tests/integration_tests/test_e2e_stability.py @@ -0,0 +1,39 @@ +"""Stability tests: repeated invocations produce structurally consistent output. + +Requires APIFY_API_TOKEN. +""" + +from __future__ import annotations + +import json +import os + +import pytest + +from langchain_apify import ApifyGetDatasetItemsTool, ApifyRunActorTool + +_ACTOR_ID = 'apify/python-example' +_RUN_INPUT = {'first_number': 2, 'second_number': 3} + +pytestmark = pytest.mark.skipif(not os.getenv('APIFY_API_TOKEN'), reason='APIFY_API_TOKEN not set') + + +def test_repeated_runs_have_consistent_shape() -> None: + tool = ApifyRunActorTool() + results = [json.loads(tool.invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT})) for _ in range(3)] + + for r in results: + assert set(r.keys()) == {'run_id', 'status', 'dataset_id', 'started_at', 'finished_at'} + assert r['status'] == 'SUCCEEDED' + assert len({r['dataset_id'] for r in results}) == 3 # unique per run + + +def test_same_dataset_returns_identical_items() -> None: + ds_id = json.loads( + ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT}) + )['dataset_id'] + + tool = ApifyGetDatasetItemsTool() + results = [json.loads(tool.invoke({'dataset_id': ds_id, 'limit': 10})) for _ in range(3)] + + assert all(r['items'] == results[0]['items'] for r in results) diff --git a/tests/integration_tests/test_e2e_timeout.py b/tests/integration_tests/test_e2e_timeout.py new file mode 100644 index 0000000..1e39039 --- /dev/null +++ b/tests/integration_tests/test_e2e_timeout.py @@ -0,0 +1,43 @@ +"""Timeout and threshold tests against the live Apify API. + +Requires APIFY_API_TOKEN. +""" + +from __future__ import annotations + +import json +import os + +import pytest + +from langchain_apify import ApifyGetDatasetItemsTool, ApifyRunActorTool + +_ACTOR_ID = 'apify/python-example' +_RUN_INPUT = {'first_number': 2, 'second_number': 3} + +pytestmark = pytest.mark.skipif(not os.getenv('APIFY_API_TOKEN'), reason='APIFY_API_TOKEN not set') + + +def test_offset_beyond_dataset_returns_empty() -> None: + ds_id = json.loads( + ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT}) + )['dataset_id'] + + result = json.loads(ApifyGetDatasetItemsTool().invoke({'dataset_id': ds_id, 'limit': 10, 'offset': 999999})) + assert result['items'] == [] + + +def test_limit_larger_than_dataset_returns_available() -> None: + ds_id = json.loads( + ApifyRunActorTool().invoke({'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT}) + )['dataset_id'] + + result = json.loads(ApifyGetDatasetItemsTool().invoke({'dataset_id': ds_id, 'limit': 1000})) + assert isinstance(result['items'], list) + + +def test_memory_128mb_accepted() -> None: + parsed = json.loads(ApifyRunActorTool().invoke({ + 'actor_id': _ACTOR_ID, 'run_input': _RUN_INPUT, 'memory_mbytes': 128, + })) + assert parsed['status'] == 'SUCCEEDED' diff --git a/tests/unit_tests/test_clamping.py b/tests/unit_tests/test_clamping.py new file mode 100644 index 0000000..d7a569d --- /dev/null +++ b/tests/unit_tests/test_clamping.py @@ -0,0 +1,69 @@ +"""Unit tests for clamping boundaries NOT covered by the existing test_tools.py. + +Focuses on: +- _clamp_depth (not tested elsewhere) +- Exact at-boundary values (existing only tests above-max) +- Configurable thresholds with relaxed limits +""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from langchain_apify._client import ApifyToolsClient +from langchain_apify.tools import ApifyRunActorTool, _ApifyGenericTool + + +def _make_tool(**kwargs) -> _ApifyGenericTool: # type: ignore[type-arg] + with patch.object(ApifyToolsClient, '__init__', return_value=None): + return ApifyRunActorTool(apify_api_token='dummy', **kwargs) + + +class TestClampDepth: + """_clamp_depth is not covered by existing tests at all.""" + + @pytest.mark.parametrize('input_val,expected', [ + (-999, 0), + (-1, 0), + (0, 0), + (3, 3), + (5, 5), + (100, 5), + ]) + def test_boundaries(self, input_val: int, expected: int) -> None: + tool = _make_tool(max_crawl_depth=5) + assert tool._clamp_depth(input_val) == expected + + +class TestAtExactMax: + """Existing tests only check above-max. Verify at-max passes through.""" + + def test_timeout_at_max(self) -> None: + assert _make_tool(max_timeout_secs=600)._clamp_timeout(600) == 600 + + def test_memory_at_max(self) -> None: + assert _make_tool(max_memory_mbytes=32768)._clamp_memory(32768) == 32768 + + def test_items_at_max(self) -> None: + assert _make_tool(max_items=1000)._clamp_items(1000) == 1000 + + def test_depth_at_max(self) -> None: + assert _make_tool(max_crawl_depth=5)._clamp_depth(5) == 5 + + +class TestRelaxedLimits: + """Verify relaxed custom limits allow higher values.""" + + def test_high_limits_pass_through(self) -> None: + tool = _make_tool( + max_timeout_secs=9999, + max_items=50000, + max_memory_mbytes=65536, + max_crawl_depth=20, + ) + assert tool._clamp_timeout(5000) == 5000 + assert tool._clamp_items(30000) == 30000 + assert tool._clamp_memory(65536) == 65536 + assert tool._clamp_depth(15) == 15 diff --git a/tests/unit_tests/test_input_validation.py b/tests/unit_tests/test_input_validation.py new file mode 100644 index 0000000..34fd936 --- /dev/null +++ b/tests/unit_tests/test_input_validation.py @@ -0,0 +1,337 @@ +"""Unit tests for Pydantic input schema validation. + +Covers: missing required fields, wrong types, schema-vs-clamping boundary, +string edge cases, empty/invalid IDs, malformed dicts, boundaries. +""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest +from langchain_core.documents import Document +from pydantic import ValidationError + +from langchain_apify import ApifyDatasetLoader +from langchain_apify.tools import ( + ApifyActorsTool, + ApifyGetDatasetItemsInput, + ApifyRunActorInput, + ApifyRunTaskInput, + ApifyScrapeUrlInput, +) + + +# --------------------------------------------------------------------------- +# Required vs optional field validation +# --------------------------------------------------------------------------- + + +class TestMissingRequiredFields: + def test_actor_id_required(self) -> None: + with pytest.raises(ValidationError, match='actor_id'): + ApifyRunActorInput() + + def test_dataset_id_required(self) -> None: + with pytest.raises(ValidationError, match='dataset_id'): + ApifyGetDatasetItemsInput() + + def test_url_required(self) -> None: + with pytest.raises(ValidationError, match='url'): + ApifyScrapeUrlInput() + + def test_task_id_required(self) -> None: + with pytest.raises(ValidationError, match='task_id'): + ApifyRunTaskInput() + + +# --------------------------------------------------------------------------- +# Pydantic type validation (wrong types passed to fields) +# --------------------------------------------------------------------------- + + +class TestWrongTypes: + def test_actor_id_int_rejected(self) -> None: + with pytest.raises(ValidationError): + ApifyRunActorInput(actor_id=12345) + + def test_dataset_id_none_rejected(self) -> None: + with pytest.raises(ValidationError): + ApifyGetDatasetItemsInput(dataset_id=None) + + def test_limit_string_rejected(self) -> None: + with pytest.raises(ValidationError): + ApifyGetDatasetItemsInput(dataset_id='x', limit='ten') + + def test_run_input_string_rejected(self) -> None: + with pytest.raises(ValidationError): + ApifyRunActorInput(actor_id='x', run_input='not a dict') + + def test_task_input_list_rejected(self) -> None: + with pytest.raises(ValidationError): + ApifyRunTaskInput(task_id='t', task_input=[1, 2, 3]) + + +# --------------------------------------------------------------------------- +# Negative offset values +# --------------------------------------------------------------------------- + + +class TestOffsetBoundaries: + def test_negative_offset_accepted_by_schema(self) -> None: + """Pydantic does not reject negative offset; API behaviour is undefined.""" + model = ApifyGetDatasetItemsInput(dataset_id='x', offset=-1) + assert model.offset == -1 + + def test_zero_offset(self) -> None: + model = ApifyGetDatasetItemsInput(dataset_id='x', offset=0) + assert model.offset == 0 + + def test_huge_offset(self) -> None: + model = ApifyGetDatasetItemsInput(dataset_id='x', offset=999999) + assert model.offset == 999999 + + +# --------------------------------------------------------------------------- +# Empty/invalid ID strings for all fields +# --------------------------------------------------------------------------- + + +class TestEmptyStringBoundaries: + def test_empty_actor_id(self) -> None: + model = ApifyRunActorInput(actor_id='') + assert model.actor_id == '' + + def test_empty_dataset_id(self) -> None: + model = ApifyGetDatasetItemsInput(dataset_id='') + assert model.dataset_id == '' + + def test_empty_task_id(self) -> None: + model = ApifyRunTaskInput(task_id='') + assert model.task_id == '' + + def test_empty_url(self) -> None: + model = ApifyScrapeUrlInput(url='') + assert model.url == '' + + +# --------------------------------------------------------------------------- +# Empty run_input dict handling +# --------------------------------------------------------------------------- + + +class TestRunInputDictHandling: + def test_empty_dict_accepted(self) -> None: + model = ApifyRunActorInput(actor_id='x', run_input={}) + assert model.run_input == {} + + def test_none_run_input_accepted(self) -> None: + model = ApifyRunActorInput(actor_id='x', run_input=None) + assert model.run_input is None + + def test_none_task_input_accepted(self) -> None: + model = ApifyRunTaskInput(task_id='t', task_input=None) + assert model.task_input is None + + def test_empty_task_input_accepted(self) -> None: + model = ApifyRunTaskInput(task_id='t', task_input={}) + assert model.task_input == {} + + +# --------------------------------------------------------------------------- +# Malformed dict structure for run_input / task_input +# --------------------------------------------------------------------------- + + +class TestMalformedDictStructure: + def test_deeply_nested_run_input(self) -> None: + nested = {'a': {'b': {'c': {'d': [1, 2, {'e': True}]}}}} + model = ApifyRunActorInput(actor_id='x', run_input=nested) + assert model.run_input == nested + + def test_run_input_with_none_values(self) -> None: + model = ApifyRunActorInput(actor_id='x', run_input={'key': None, 'list': [None]}) + assert model.run_input['key'] is None + + def test_run_input_with_mixed_types(self) -> None: + mixed = {'str': 'a', 'int': 1, 'float': 1.5, 'bool': True, 'list': [], 'dict': {}} + model = ApifyRunActorInput(actor_id='x', run_input=mixed) + assert model.run_input == mixed + + +# --------------------------------------------------------------------------- +# Invalid JSON string input for ApifyActorsTool.run_input +# --------------------------------------------------------------------------- + + +class TestApifyActorsToolRunInput: + """The legacy ApifyActorsTool accepts run_input as str or dict.""" + + @pytest.fixture + def tool(self) -> ApifyActorsTool: + with ( + patch.object(ApifyActorsTool, '_create_description', return_value='desc'), + patch.object(ApifyActorsTool, '_build_tool_args_schema_model') as mock_schema, + ): + from pydantic import BaseModel + + class DummyModel(BaseModel): + run_input: str | dict + + mock_schema.return_value = DummyModel + return ApifyActorsTool(actor_id='a/b', apify_api_token='dummy') + + def test_valid_json_string_accepted(self, tool: ApifyActorsTool) -> None: + with patch.object(ApifyActorsTool, '_run_actor', return_value=[]): + result = tool._run('{"key": "value"}') + # Should not raise; parsed internally + + def test_dict_input_accepted(self, tool: ApifyActorsTool) -> None: + with patch.object(ApifyActorsTool, '_run_actor', return_value=[]): + result = tool._run({'key': 'value'}) + + def test_invalid_json_string_raises(self, tool: ApifyActorsTool) -> None: + with patch.object(ApifyActorsTool, '_run_actor', return_value=[]): + with pytest.raises(Exception): + tool._run('not valid json {{{') + + +# --------------------------------------------------------------------------- +# String length boundaries +# --------------------------------------------------------------------------- + + +class TestStringLengthBoundaries: + def test_very_long_actor_id(self) -> None: + long_id = 'a' * 5000 + model = ApifyRunActorInput(actor_id=long_id) + assert len(model.actor_id) == 5000 + + def test_very_long_dataset_id(self) -> None: + model = ApifyGetDatasetItemsInput(dataset_id='x' * 5000) + assert len(model.dataset_id) == 5000 + + def test_very_long_url(self) -> None: + model = ApifyScrapeUrlInput(url='https://x.com/' + 'a' * 5000) + assert len(model.url) > 5000 + + def test_very_long_task_id(self) -> None: + model = ApifyRunTaskInput(task_id='t' * 5000) + assert len(model.task_id) == 5000 + + +# --------------------------------------------------------------------------- +# run_input dict size boundaries +# --------------------------------------------------------------------------- + + +class TestRunInputSizeBoundaries: + def test_large_run_input_accepted(self) -> None: + large = {f'key_{i}': f'value_{i}' for i in range(1000)} + model = ApifyRunActorInput(actor_id='x', run_input=large) + assert len(model.run_input) == 1000 + + def test_large_nested_run_input(self) -> None: + large = {'items': [{'id': i, 'data': 'x' * 100} for i in range(500)]} + model = ApifyRunActorInput(actor_id='x', run_input=large) + assert len(model.run_input['items']) == 500 + + +# --------------------------------------------------------------------------- +# Unicode/special character boundaries +# --------------------------------------------------------------------------- + + +class TestUnicodeAndSpecialChars: + def test_unicode_actor_id(self) -> None: + model = ApifyRunActorInput(actor_id='user/actor-日本語-émojis') + assert '日本語' in model.actor_id + + def test_special_chars_in_dataset_id(self) -> None: + model = ApifyGetDatasetItemsInput(dataset_id='ds-with-dashes_and_underscores') + assert model.dataset_id == 'ds-with-dashes_and_underscores' + + def test_url_with_unicode_path(self) -> None: + model = ApifyScrapeUrlInput(url='https://example.com/café/日本語') + assert 'café' in model.url + + def test_special_chars_in_task_id(self) -> None: + model = ApifyRunTaskInput(task_id='user~my-task_v2') + assert model.task_id == 'user~my-task_v2' + + +# --------------------------------------------------------------------------- +# URL format validation boundaries +# --------------------------------------------------------------------------- + + +class TestUrlFormatBoundaries: + def test_malformed_url(self) -> None: + model = ApifyScrapeUrlInput(url='not-a-url') + assert model.url == 'not-a-url' + + def test_ftp_scheme(self) -> None: + model = ApifyScrapeUrlInput(url='ftp://files.example.com/data') + assert model.url.startswith('ftp://') + + def test_url_with_query_params(self) -> None: + model = ApifyScrapeUrlInput(url='https://x.com/page?q=hello&lang=en#section') + assert '?' in model.url and '#' in model.url + + def test_url_with_port(self) -> None: + model = ApifyScrapeUrlInput(url='http://localhost:8080/api') + assert ':8080' in model.url + + +# --------------------------------------------------------------------------- +# ApifyDatasetLoader input validation +# --------------------------------------------------------------------------- + + +class TestApifyDatasetLoaderValidation: + def _dummy_fn(self, item: dict) -> Document: + return Document(page_content=str(item)) + + def test_missing_dataset_id_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv('APIFY_API_TOKEN', 'dummy') + with pytest.raises((TypeError, ValidationError)): + ApifyDatasetLoader( + dataset_id=None, # type: ignore[arg-type] + dataset_mapping_function=self._dummy_fn, + ) + + def test_missing_mapping_function_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv('APIFY_API_TOKEN', 'dummy') + with pytest.raises((TypeError, ValidationError)): + ApifyDatasetLoader( + dataset_id='ds-1', + dataset_mapping_function=None, # type: ignore[arg-type] + ) + + def test_empty_dataset_id_accepted(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Empty string passes validation; API will reject it.""" + monkeypatch.setenv('APIFY_API_TOKEN', 'dummy') + with patch('langchain_apify.document_loaders._create_apify_client'): + loader = ApifyDatasetLoader(dataset_id='', dataset_mapping_function=self._dummy_fn) + assert loader.dataset_id == '' + + def test_missing_token_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv('APIFY_API_TOKEN', raising=False) + monkeypatch.delenv('APIFY_TOKEN', raising=False) + with pytest.raises(ValueError, match='APIFY_API_TOKEN'): + ApifyDatasetLoader(dataset_id='ds-1', dataset_mapping_function=self._dummy_fn) + + +# --------------------------------------------------------------------------- +# Schema does not enforce range (clamping handles it) +# --------------------------------------------------------------------------- + + +class TestSchemaDoesNotEnforceRange: + def test_negative_timeout_accepted(self) -> None: + model = ApifyRunActorInput(actor_id='x', timeout_secs=-1) + assert model.timeout_secs == -1 + + def test_negative_limit_accepted(self) -> None: + model = ApifyGetDatasetItemsInput(dataset_id='x', limit=-1) + assert model.limit == -1 diff --git a/tests/unit_tests/test_integration_mock.py b/tests/unit_tests/test_integration_mock.py new file mode 100644 index 0000000..df43996 --- /dev/null +++ b/tests/unit_tests/test_integration_mock.py @@ -0,0 +1,80 @@ +"""Tests for handle_tool_error via .invoke() and output schema contracts. + +Existing test_tools.py tests ._run() directly. These test the LangChain +.invoke() path where ToolException becomes a string response, and verify +the exact output key sets match the documented contract. +""" + +from __future__ import annotations + +import json +from unittest.mock import MagicMock + +from langchain_apify.tools import ( + ApifyGetDatasetItemsTool, + ApifyRunActorAndGetDatasetTool, + ApifyRunActorTool, + ApifyScrapeUrlTool, +) +from tests.unit_tests.conftest import SAMPLE_ITEMS, SUCCEEDED_RUN, make_tool + + +class TestHandleToolErrorViaInvoke: + """ToolException must become a string (not raise) when using .invoke().""" + + def test_run_actor_error_becomes_string(self, mock_tools_client: MagicMock) -> None: + mock_tools_client.run_actor.side_effect = RuntimeError('Actor not found.') + tool = make_tool(ApifyRunActorTool, mock_tools_client) + + result = tool.invoke({'actor_id': 'bad/actor'}) + + assert isinstance(result, str) + assert 'Actor not found' in result + + def test_get_dataset_error_becomes_string(self, mock_tools_client: MagicMock) -> None: + mock_tools_client.get_dataset_items.side_effect = RuntimeError('fetch failed') + tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client) + + result = tool.invoke({'dataset_id': 'bad-id'}) + + assert isinstance(result, str) + assert 'fetch failed' in result + + def test_scrape_url_error_becomes_string(self, mock_tools_client: MagicMock) -> None: + mock_tools_client.scrape_url.side_effect = RuntimeError('No content extracted.') + tool = make_tool(ApifyScrapeUrlTool, mock_tools_client) + + result = tool.invoke({'url': 'https://bad.example.com'}) + + assert isinstance(result, str) + assert 'No content extracted' in result + + +class TestOutputSchemaContract: + """Verify output key sets match the documented API contract.""" + + _RUN_KEYS = {'run_id', 'status', 'dataset_id', 'started_at', 'finished_at'} + + def test_run_actor_keys(self, mock_tools_client: MagicMock) -> None: + mock_tools_client.run_actor.return_value = SUCCEEDED_RUN + tool = make_tool(ApifyRunActorTool, mock_tools_client) + assert set(json.loads(tool._run(actor_id='a')).keys()) == self._RUN_KEYS + + def test_run_actor_and_get_dataset_keys(self, mock_tools_client: MagicMock) -> None: + mock_tools_client.run_actor_and_get_items.return_value = (SUCCEEDED_RUN, SAMPLE_ITEMS) + tool = make_tool(ApifyRunActorAndGetDatasetTool, mock_tools_client) + result = json.loads(tool._run(actor_id='a')) + assert set(result.keys()) == {'run', 'items'} + assert set(result['run'].keys()) == self._RUN_KEYS + + def test_get_dataset_items_keys_nonempty(self, mock_tools_client: MagicMock) -> None: + mock_tools_client.get_dataset_items.return_value = SAMPLE_ITEMS + tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client) + result = json.loads(tool._run(dataset_id='ds')) + assert set(result.keys()) == {'items'} + + def test_get_dataset_items_keys_empty(self, mock_tools_client: MagicMock) -> None: + mock_tools_client.get_dataset_items.return_value = [] + tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client) + result = json.loads(tool._run(dataset_id='ds')) + assert set(result.keys()) == {'items', 'message'} diff --git a/tests/unit_tests/test_run_meta.py b/tests/unit_tests/test_run_meta.py new file mode 100644 index 0000000..5cd2935 --- /dev/null +++ b/tests/unit_tests/test_run_meta.py @@ -0,0 +1,42 @@ +"""Unit tests for _run_meta() edge cases NOT covered by test_tools.py. + +Existing tests cover: datetime/string/None conversion, JSON serialization. +These add: empty dict, extra keys filtered, different statuses. +""" + +from __future__ import annotations + +from langchain_apify._utils import _run_meta + + +def test_empty_dict_returns_all_nones() -> None: + meta = _run_meta({}) + assert meta == { + 'run_id': None, + 'status': None, + 'dataset_id': None, + 'started_at': None, + 'finished_at': None, + } + + +def test_extra_api_keys_are_filtered() -> None: + run = { + 'id': 'run-x', + 'status': 'SUCCEEDED', + 'defaultDatasetId': 'ds-x', + 'startedAt': '2025-01-01T00:00:00Z', + 'finishedAt': '2025-01-01T00:01:00Z', + 'actId': 'abc', + 'buildId': 'xyz', + 'exitCode': 0, + 'usageTotalUsd': 0.001, + } + meta = _run_meta(run) + assert set(meta.keys()) == {'run_id', 'status', 'dataset_id', 'started_at', 'finished_at'} + + +def test_non_succeeded_status_preserved() -> None: + for status in ('FAILED', 'TIMED-OUT', 'RUNNING', 'ABORTING'): + meta = _run_meta({'id': 'r', 'status': status, 'defaultDatasetId': None}) + assert meta['status'] == status diff --git a/tests/unit_tests/test_two_step_orchestration.py b/tests/unit_tests/test_two_step_orchestration.py new file mode 100644 index 0000000..5a43669 --- /dev/null +++ b/tests/unit_tests/test_two_step_orchestration.py @@ -0,0 +1,45 @@ +"""Tests for the two-step RunActor -> GetDatasetItems orchestration pattern.""" + +from __future__ import annotations + +import json +from unittest.mock import MagicMock + +import pytest + +from langchain_apify.tools import ApifyGetDatasetItemsTool, ApifyRunActorTool +from tests.unit_tests.conftest import SAMPLE_ITEMS, SUCCEEDED_RUN, make_tool + + +def test_dataset_id_flows_from_step1_to_step2(mock_tools_client: MagicMock) -> None: + mock_tools_client.run_actor.return_value = SUCCEEDED_RUN + mock_tools_client.get_dataset_items.return_value = SAMPLE_ITEMS + + run_tool = make_tool(ApifyRunActorTool, mock_tools_client) + items_tool = make_tool(ApifyGetDatasetItemsTool, mock_tools_client) + + dataset_id = json.loads(run_tool._run(actor_id='a/b', run_input={}))['dataset_id'] + parsed = json.loads(items_tool._run(dataset_id=dataset_id, limit=3)) + + assert parsed['items'] == SAMPLE_ITEMS + mock_tools_client.get_dataset_items.assert_called_once_with('dataset-xyz', 3, 0) + + +def test_failed_step1_is_not_valid_json(mock_tools_client: MagicMock) -> None: + mock_tools_client.run_actor.side_effect = RuntimeError('Actor not found.') + run_tool = make_tool(ApifyRunActorTool, mock_tools_client) + + result = run_tool.invoke({'actor_id': 'bad/actor'}) + + with pytest.raises(json.JSONDecodeError): + json.loads(result) + + +def test_each_run_produces_unique_dataset_id(mock_tools_client: MagicMock) -> None: + runs = [{**SUCCEEDED_RUN, 'id': f'run-{i}', 'defaultDatasetId': f'ds-{i}'} for i in range(3)] + mock_tools_client.run_actor.side_effect = runs + run_tool = make_tool(ApifyRunActorTool, mock_tools_client) + + ids = [json.loads(run_tool._run(actor_id='a/b'))['dataset_id'] for _ in range(3)] + + assert ids == ['ds-0', 'ds-1', 'ds-2']