diff --git a/.github/workflows/loongsuite_lint_0.yml b/.github/workflows/loongsuite_lint_0.yml index 8f17f4ed1..9d569e479 100644 --- a/.github/workflows/loongsuite_lint_0.yml +++ b/.github/workflows/loongsuite_lint_0.yml @@ -241,3 +241,22 @@ jobs: - name: Run tests run: tox -c tox-loongsuite.ini -e lint-loongsuite-instrumentation-copaw + lint-loongsuite-instrumentation-sweagent: + name: LoongSuite loongsuite-instrumentation-sweagent + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e lint-loongsuite-instrumentation-sweagent + diff --git a/.github/workflows/loongsuite_test_0.yml b/.github/workflows/loongsuite_test_0.yml index 778dc6c0e..e98ee2e69 100644 --- a/.github/workflows/loongsuite_test_0.yml +++ b/.github/workflows/loongsuite_test_0.yml @@ -1609,3 +1609,117 @@ jobs: - name: Run tests run: tox -c tox-loongsuite.ini -e py313-test-loongsuite-instrumentation-copaw -- -ra + py311-test-loongsuite-instrumentation-sweagent-oldest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-sweagent-oldest 3.11 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py311-test-loongsuite-instrumentation-sweagent-oldest -- -ra + + py311-test-loongsuite-instrumentation-sweagent-latest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-sweagent-latest 3.11 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py311-test-loongsuite-instrumentation-sweagent-latest -- -ra + + py312-test-loongsuite-instrumentation-sweagent-oldest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-sweagent-oldest 3.12 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py312-test-loongsuite-instrumentation-sweagent-oldest -- -ra + + py312-test-loongsuite-instrumentation-sweagent-latest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-sweagent-latest 3.12 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py312-test-loongsuite-instrumentation-sweagent-latest -- -ra + + py313-test-loongsuite-instrumentation-sweagent-oldest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-sweagent-oldest 3.13 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py313-test-loongsuite-instrumentation-sweagent-oldest -- -ra + + py313-test-loongsuite-instrumentation-sweagent-latest_ubuntu-latest: + name: LoongSuite loongsuite-instrumentation-sweagent-latest 3.13 Ubuntu + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo @ SHA - ${{ github.sha }} + uses: actions/checkout@v4 + + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install tox + run: pip install tox-uv + + - name: Run tests + run: tox -c tox-loongsuite.ini -e py313-test-loongsuite-instrumentation-sweagent-latest -- -ra + diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/CHANGELOG.md b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/CHANGELOG.md new file mode 100644 index 000000000..09c28f27b --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/CHANGELOG.md @@ -0,0 +1,16 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +### Added + +- Initial SWE-agent instrumentation: `SweagentInstrumentor` emits GenAI spans + (entry, `invoke_agent`, react step, `execute_tool` with LLM `tool_calls` when + available) via `ExtendedTelemetryHandler`; includes tests, examples, and + tox/CI wiring. + ([#165](https://github.com/alibaba/loongsuite-python-agent/pull/165)) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/README.rst b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/README.rst new file mode 100644 index 000000000..89ce512f7 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/README.rst @@ -0,0 +1,70 @@ +LoongSuite instrumentation for SWE-agent +======================================== + +This package instruments `SWE-agent `_ +using ``opentelemetry-util-genai`` ``ExtendedTelemetryHandler`` so traces align +with other LoongSuite GenAI plugins. + +Spans +----- + +.. list-table:: + :header-rows: 1 + + * - SWE-agent locus + - Span name (typical) + - ``gen_ai.operation.name`` / ``gen_ai.span.kind`` + + * - ``CombinedRunHooks.on_instance_start`` → ``on_instance_completed`` + - ``enter_ai_application_system`` + - ``enter`` / ``ENTRY`` + + * - ``CombinedAgentHook.on_run_start`` → ``on_run_done`` + - ``invoke_agent swe-agent`` (``gen_ai.agent.name``); ``input_messages`` / ``output_messages`` match the entry span (same problem-statement user text via thread-local from ``on_instance_start``, same run summary as ``AgentRunResult`` via ``info`` + ``trajectory``); ``gen_ai.conversation.id`` = problem id when set; token usage from ``info.model_stats`` when present + - ``invoke_agent`` / ``AGENT`` + + * - ``CombinedAgentHook.on_step_start`` → ``on_step_done`` + - ``react step`` + - ``react`` / ``STEP`` + + * - ``DefaultAgent.handle_action`` (bash / ``communicate``) + - ``execute_tool `` where ``name`` is the first LLM ``tool_calls[*].function.name`` when ``StepOutput.tool_calls`` is set (function-calling mode); otherwise ``execute_tool sweagent_bash`` (thought/action parsing). Tool arguments recorded as ``function.arguments`` (JSON decoded when possible), else ``step.action``. + - ``execute_tool`` / ``TOOL`` + +Remote LLM calls (LiteLLM) are **not** duplicated here; enable +``loongsuite-instrumentation-litellm`` (or equivalent) for model spans. + +Requirements +------------ + +- Python **3.11+** (matches upstream SWE-agent). + +Installation +------------ + +From the LoongSuite repo root (after installing ``sweagent`` and ``opentelemetry-util-genai``): + +:: + + pip install -e ./util/opentelemetry-util-genai + pip install -e ./instrumentation-loongsuite/loongsuite-instrumentation-sweagent + +Usage +----- + +:: + + from opentelemetry.instrumentation.sweagent import SweagentInstrumentor + from opentelemetry.sdk.trace import TracerProvider + + provider = TracerProvider() + # add_span_processor(...) # e.g. OTLP or console + + SweagentInstrumentor().instrument(tracer_provider=provider) + # ... run sweagent ... + SweagentInstrumentor().uninstrument() + +Entry span input is derived from ``problem_statement.id`` and a truncated +``get_problem_statement()`` body. Output summarizes ``AgentRunResult.info`` and +trajectory length. Tool span arguments/results follow GenAI content-capture +environment variables when experimental semconv is enabled. diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/examples/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/examples/__init__.py new file mode 100644 index 000000000..b0a6f4284 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/examples/__init__.py @@ -0,0 +1,13 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/examples/basic_example.py b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/examples/basic_example.py new file mode 100644 index 000000000..8b231a172 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/examples/basic_example.py @@ -0,0 +1,57 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Minimal example: instrument run hooks and export spans to the console.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from sweagent.run.hooks.abstract import CombinedRunHooks +from sweagent.types import AgentInfo, AgentRunResult + +from opentelemetry.instrumentation.sweagent import SweagentInstrumentor +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ( + BatchSpanProcessor, + ConsoleSpanExporter, +) + + +def main() -> None: + exporter = ConsoleSpanExporter() + provider = TracerProvider() + provider.add_span_processor(BatchSpanProcessor(exporter)) + SweagentInstrumentor().instrument(tracer_provider=provider) + + hooks = CombinedRunHooks() + prob = MagicMock() + prob.id = "demo-instance" + prob.get_problem_statement.return_value = "Example task description" + hooks.on_instance_start( + index=0, + env=MagicMock(), + problem_statement=prob, + ) + hooks.on_instance_completed( + result=AgentRunResult( + info=AgentInfo(exit_status="done"), + trajectory=[], + ) + ) + SweagentInstrumentor().uninstrument() + + +if __name__ == "__main__": + main() diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/pyproject.toml new file mode 100644 index 000000000..e0dcd2d60 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/pyproject.toml @@ -0,0 +1,55 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "loongsuite-instrumentation-sweagent" +dynamic = ["version"] +description = "LoongSuite instrumentation for SWE-agent (GenAI spans via util-genai)" +readme = "README.rst" +license = "Apache-2.0" +requires-python = ">=3.11" +authors = [ + { name = "LoongSuite Python Agent Authors", email = "qp467389@alibaba-inc.com" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dependencies = [ + "opentelemetry-api ~= 1.37", + "opentelemetry-instrumentation >= 0.58b0", + "opentelemetry-semantic-conventions >= 0.58b0", + "opentelemetry-util-genai >= 0.2b0", + "wrapt >= 1.17.3", +] + +[project.optional-dependencies] +instruments = [ + "sweagent >= 1.1.0", +] + +[project.entry-points.opentelemetry_instrumentor] +sweagent = "opentelemetry.instrumentation.sweagent:SweagentInstrumentor" + +[project.urls] +Homepage = "https://github.com/alibaba/loongsuite-python-agent" +Repository = "https://github.com/alibaba/loongsuite-python-agent" + +[tool.hatch.version] +path = "src/opentelemetry/instrumentation/sweagent/version.py" + +[tool.hatch.build.targets.sdist] +include = [ + "src", + "tests", +] + +[tool.hatch.build.targets.wheel] +packages = ["src/opentelemetry"] diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/__init__.py new file mode 100644 index 000000000..5006dbf5a --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/__init__.py @@ -0,0 +1,160 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""LongSuite instrumentation for SWE-agent using ExtendedTelemetryHandler.""" + +from __future__ import annotations + +import logging +from typing import Any, Collection + +from wrapt import wrap_function_wrapper + +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor +from opentelemetry.instrumentation.sweagent.package import _instruments +from opentelemetry.instrumentation.sweagent.patch import ( + _AGENT_HOOKS_MODULE, + _AGENTS_MODULE, + _RUN_HOOKS_MODULE, + bind_extended_handler, + wrap_combined_agent_hook_on_run_done, + wrap_combined_agent_hook_on_run_start, + wrap_combined_agent_hook_on_step_done, + wrap_combined_agent_hook_on_step_start, + wrap_combined_run_hooks_on_instance_completed, + wrap_combined_run_hooks_on_instance_start, + wrap_default_agent_handle_action, +) +from opentelemetry.instrumentation.utils import unwrap +from opentelemetry.util.genai.extended_handler import ExtendedTelemetryHandler + +logger = logging.getLogger(__name__) + +_COMBINED_RUN_HOOK_PATCHES: tuple[tuple[str, Any], ...] = ( + ("on_instance_start", wrap_combined_run_hooks_on_instance_start), + ("on_instance_completed", wrap_combined_run_hooks_on_instance_completed), +) + +_COMBINED_AGENT_HOOK_PATCHES: tuple[tuple[str, Any], ...] = ( + ("on_run_start", wrap_combined_agent_hook_on_run_start), + ("on_run_done", wrap_combined_agent_hook_on_run_done), + ("on_step_start", wrap_combined_agent_hook_on_step_start), + ("on_step_done", wrap_combined_agent_hook_on_step_done), +) + +__all__ = ["SweagentInstrumentor"] + + +class SweagentInstrumentor(BaseInstrumentor): + """Instrument SWE-agent run and agent hooks with GenAI semantic spans.""" + + def __init__(self) -> None: + super().__init__() + self._handler: ExtendedTelemetryHandler | None = None + + def instrumentation_dependencies(self) -> Collection[str]: + return _instruments + + def _instrument(self, **kwargs: Any) -> None: + tracer_provider = kwargs.get("tracer_provider") + meter_provider = kwargs.get("meter_provider") + logger_provider = kwargs.get("logger_provider") + + self._handler = ExtendedTelemetryHandler( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) + handler = self._handler + + for name, fn in _COMBINED_RUN_HOOK_PATCHES: + try: + wrap_function_wrapper( + _RUN_HOOKS_MODULE, + f"CombinedRunHooks.{name}", + bind_extended_handler(handler, fn), + ) + logger.debug("Wrapped CombinedRunHooks.%s", name) + except Exception as e: + logger.warning( + "Failed to wrap CombinedRunHooks.%s: %s", name, e + ) + + for name, fn in _COMBINED_AGENT_HOOK_PATCHES: + try: + wrap_function_wrapper( + _AGENT_HOOKS_MODULE, + f"CombinedAgentHook.{name}", + bind_extended_handler(handler, fn), + ) + logger.debug("Wrapped CombinedAgentHook.%s", name) + except Exception as e: + logger.warning( + "Failed to wrap CombinedAgentHook.%s: %s", name, e + ) + + try: + wrap_function_wrapper( + _AGENTS_MODULE, + "DefaultAgent.handle_action", + bind_extended_handler( + handler, wrap_default_agent_handle_action + ), + ) + logger.debug("Wrapped DefaultAgent.handle_action") + except Exception as e: + logger.warning("Failed to wrap DefaultAgent.handle_action: %s", e) + + def _uninstrument(self, **kwargs: Any) -> None: + del kwargs + try: + import sweagent.agent.agents as agent_mod # noqa: PLC0415 + + unwrap(agent_mod.DefaultAgent, "handle_action") + except Exception as e: + logger.warning( + "Failed to unwrap DefaultAgent.handle_action: %s", e + ) + + try: + import sweagent.agent.hooks.abstract as agent_hooks # noqa: PLC0415 + + for name, _ in _COMBINED_AGENT_HOOK_PATCHES: + try: + unwrap(agent_hooks.CombinedAgentHook, name) + except Exception as e: + logger.warning( + "Failed to unwrap CombinedAgentHook.%s: %s", name, e + ) + except Exception as e: + logger.warning( + "Failed to import agent hooks for uninstrument: %s", e + ) + + try: + import sweagent.run.hooks.abstract as run_hooks # noqa: PLC0415 + + for name, _ in _COMBINED_RUN_HOOK_PATCHES: + try: + unwrap(run_hooks.CombinedRunHooks, name) + except Exception as e: + logger.warning( + "Failed to unwrap CombinedRunHooks.%s: %s", name, e + ) + except Exception as e: + logger.warning( + "Failed to import run hooks for uninstrument: %s", e + ) + + self._handler = None diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/package.py b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/package.py new file mode 100644 index 000000000..cada5789c --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/package.py @@ -0,0 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_instruments = ("sweagent >= 1.1.0",) + +_supports_metrics = False diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/patch.py new file mode 100644 index 000000000..9ba54340f --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/patch.py @@ -0,0 +1,415 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""SWE-agent hook and agent method wrappers using ExtendedTelemetryHandler.""" + +from __future__ import annotations + +import json +import logging +import threading +from types import SimpleNamespace +from typing import Any + +from opentelemetry.util.genai.extended_handler import ExtendedTelemetryHandler +from opentelemetry.util.genai.extended_types import ( + EntryInvocation, + ExecuteToolInvocation, + InvokeAgentInvocation, + ReactStepInvocation, +) +from opentelemetry.util.genai.types import ( + Error, + InputMessage, + OutputMessage, + Text, +) + +logger = logging.getLogger(__name__) + +SWEAGENT_PROVIDER = "sweagent" +SWEAGENT_AGENT_NAME = "swe-agent" +SWEAGENT_AGENT_DESCRIPTION = ( + "SWE-agent autonomous software engineering agent (bash/tools loop)" +) +SWEAGENT_BASH_TOOL_NAME = "sweagent_bash" +_PROBLEM_TEXT_MAX_LEN = 4096 + +# Links CombinedRunHooks.on_instance_start(problem_statement=...) to +# CombinedAgentHook.on_run_start() on the same thread (no PS in hook kwargs). +_instance_tls = threading.local() + + +def _tool_name_from_tool_call_item(call: Any) -> str | None: + """Extract function/tool name from one OpenAI-style tool_calls entry (dict or object).""" + if call is None: + return None + if isinstance(call, dict): + fn = call.get("function") + if isinstance(fn, dict): + name = fn.get("name") + if isinstance(name, str) and name.strip(): + return name.strip() + name = call.get("name") + if isinstance(name, str) and name.strip(): + return name.strip() + return None + fn = getattr(call, "function", None) + if fn is not None: + name = getattr(fn, "name", None) + if isinstance(name, str) and name.strip(): + return name.strip() + name = getattr(call, "name", None) + if isinstance(name, str) and name.strip(): + return name.strip() + return None + + +def tool_name_from_sweagent_step(step: Any) -> str: + """Tool name for telemetry from the model-issued tool call list. + + In ``DefaultAgent.forward``, the dict from ``model.query()`` has separate + fields: ``message`` (assistant text / ``content``) and, when the API uses + function calling, ``tool_calls``. SWE-agent assigns + ``step.tool_calls = output["tool_calls"]`` — that **is** the LLM response's + tool call payload, not a recomputation. The registered tool name lives in + ``tool_calls[*].function.name``, not in the free-text ``message`` string. + + With ``FunctionCallingParser``, SWE-agent allows **exactly one** tool call per + model response; ``len(tool_calls) != 1`` raises before ``handle_action``. + A successful ``handle_action`` therefore normally sees a single entry; the + loop below only picks the first resolvable name for robustness. + + Without native ``tool_calls`` (e.g. thought/action parsing only), fall back + to ``sweagent_bash`` because execution still uses bash ``communicate``. + """ + tool_calls = ( + getattr(step, "tool_calls", None) if step is not None else None + ) + if not tool_calls: + return SWEAGENT_BASH_TOOL_NAME + for call in tool_calls: + name = _tool_name_from_tool_call_item(call) + if name: + return name + return SWEAGENT_BASH_TOOL_NAME + + +def _select_tool_call_for_step(step: Any) -> Any | None: + """Same entry as :func:`tool_name_from_sweagent_step` when possible, else first call.""" + tool_calls = ( + getattr(step, "tool_calls", None) if step is not None else None + ) + if not tool_calls: + return None + for call in tool_calls: + if _tool_name_from_tool_call_item(call): + return call + return tool_calls[0] + + +def _normalize_function_arguments_from_tool_call(call: Any) -> Any | None: + """Return ``function.arguments`` parsed as JSON when a string; dict passthrough; else raw.""" + if call is None: + return None + if isinstance(call, dict): + fn = call.get("function") + else: + fn = getattr(call, "function", None) + if fn is None: + return None + if isinstance(fn, dict): + raw = fn.get("arguments") + else: + raw = getattr(fn, "arguments", None) + if raw is None: + return None + if isinstance(raw, dict): + return raw + if isinstance(raw, str): + stripped = raw.strip() + if not stripped: + return {} + try: + return json.loads(stripped) + except json.JSONDecodeError: + return raw + return raw + + +def tool_call_arguments_from_sweagent_step(step: Any) -> Any: + """Tool call arguments for telemetry: LLM ``function.arguments`` when native ``tool_calls`` exist. + + Otherwise the parsed shell line(s) in ``step.action`` (thought/action and similar paths). + Structured arguments are preferred when the model used function calling, since ``action`` is + the command line already expanded by SWE-agent's parser. + """ + if step is None: + return None + fallback = getattr(step, "action", None) + selected = _select_tool_call_for_step(step) + if selected is None: + return fallback + normalized = _normalize_function_arguments_from_tool_call(selected) + if normalized is not None: + return normalized + return fallback + + +_RUN_HOOKS_MODULE = "sweagent.run.hooks.abstract" +_AGENT_HOOKS_MODULE = "sweagent.agent.hooks.abstract" +_AGENTS_MODULE = "sweagent.agent.agents" + + +def _truncate(text: str, max_len: int = _PROBLEM_TEXT_MAX_LEN) -> str: + if len(text) <= max_len: + return text + return text[:max_len] + f"..." + + +def _problem_statement_id_and_text( + problem_statement: Any, +) -> tuple[str | None, str]: + instance_id = getattr(problem_statement, "id", None) + text = "" + try: + text = problem_statement.get_problem_statement() # type: ignore[attr-defined] + except Exception: # pragma: no cover + logger.debug("Could not read problem statement text", exc_info=True) + return instance_id, _truncate(text or "") + + +def _build_agent_run_summary(info: Any, trajectory: Any) -> str: + """Human-readable summary from SWE-agent ``info`` + ``trajectory`` (entry / invoke_agent output).""" + if not isinstance(info, dict): + info = {} + traj = trajectory or [] + parts: list[str] = [] + exit_status = info.get("exit_status") + if exit_status is not None: + parts.append(f"exit_status={exit_status!r}") + parts.append(f"trajectory_len={len(traj)}") + sub = info.get("submission") + if sub: + parts.append(f"submission_preview={_truncate(str(sub), 512)!r}") + ms = info.get("model_stats") + if ms: + parts.append(f"model_stats={ms!r}") + return "\n".join(parts) + + +def _build_entry_output_summary(result: Any) -> str: + """Build a text summary for Entry output_messages from AgentRunResult-like object.""" + info = getattr(result, "info", None) + traj = getattr(result, "trajectory", None) + return _build_agent_run_summary(info, traj) + + +def _apply_agent_info_to_invocation( + inv: InvokeAgentInvocation, info: Any +) -> None: + """Map SWE-agent ``AgentInfo`` to semconv-oriented invoke_agent fields when present.""" + if not isinstance(info, dict): + return + exit_status = info.get("exit_status") + if exit_status is not None: + inv.finish_reasons = [str(exit_status)] + ms = info.get("model_stats") + if not isinstance(ms, dict): + return + ts = ms.get("tokens_sent") + tr = ms.get("tokens_received") + if ts is not None: + try: + inv.input_tokens = int(ts) + except (TypeError, ValueError): + pass + if tr is not None: + try: + inv.output_tokens = int(tr) + except (TypeError, ValueError): + pass + + +def wrap_combined_run_hooks_on_instance_start( + handler: ExtendedTelemetryHandler, wrapped, instance, args, kwargs +): + instance_id, body = _problem_statement_id_and_text( + kwargs.get("problem_statement") + ) + inv = EntryInvocation( + session_id=str(instance_id) if instance_id is not None else None, + input_messages=[ + InputMessage(role="user", parts=[Text(content=body or "(empty)")]) + ], + ) + handler.start_entry(inv) + setattr(instance, "_loongsuite_entry_invocation", inv) + _instance_tls.problem_statement = kwargs.get("problem_statement") + try: + return wrapped(*args, **kwargs) + except Exception as exc: + handler.fail_entry(inv, Error(message=str(exc), type=type(exc))) + delattr(instance, "_loongsuite_entry_invocation") + _instance_tls.problem_statement = None + raise + + +def wrap_combined_run_hooks_on_instance_completed( + handler: ExtendedTelemetryHandler, wrapped, instance, args, kwargs +): + try: + return wrapped(*args, **kwargs) + finally: + inv = getattr(instance, "_loongsuite_entry_invocation", None) + if inv is None: + return + result = kwargs.get("result") + summary = ( + _build_entry_output_summary(result) + if result is not None + else "(no result)" + ) + inv.output_messages = [ + OutputMessage( + role="assistant", + parts=[Text(content=summary)], + finish_reason="stop", + ) + ] + try: + handler.stop_entry(inv) + finally: + delattr(instance, "_loongsuite_entry_invocation") + _instance_tls.problem_statement = None + + +def wrap_combined_agent_hook_on_run_start( + handler: ExtendedTelemetryHandler, wrapped, instance, args, kwargs +): + # Same user message shape as ``EntryInvocation`` (``on_instance_start``). + ps = getattr(_instance_tls, "problem_statement", None) + instance_id, body = _problem_statement_id_and_text(ps) + inv = InvokeAgentInvocation( + provider=SWEAGENT_PROVIDER, + agent_name=SWEAGENT_AGENT_NAME, + agent_description=SWEAGENT_AGENT_DESCRIPTION, + conversation_id=str(instance_id) if instance_id is not None else None, + input_messages=[ + InputMessage(role="user", parts=[Text(content=body or "(empty)")]) + ], + ) + handler.start_invoke_agent(inv) + setattr(instance, "_loongsuite_invoke_invocation", inv) + try: + return wrapped(*args, **kwargs) + except Exception as exc: + handler.fail_invoke_agent(inv, Error(message=str(exc), type=type(exc))) + delattr(instance, "_loongsuite_invoke_invocation") + raise + + +def wrap_combined_agent_hook_on_run_done( + handler: ExtendedTelemetryHandler, wrapped, instance, args, kwargs +): + try: + return wrapped(*args, **kwargs) + finally: + inv = getattr(instance, "_loongsuite_invoke_invocation", None) + if inv is None: + return + # Same summary text as entry ``on_instance_completed`` (``AgentRunResult``-like). + result_like = SimpleNamespace( + info=kwargs.get("info"), + trajectory=kwargs.get("trajectory"), + ) + summary = _build_entry_output_summary(result_like) + inv.output_messages = [ + OutputMessage( + role="assistant", + parts=[Text(content=summary)], + finish_reason="stop", + ) + ] + _apply_agent_info_to_invocation(inv, result_like.info) + try: + handler.stop_invoke_agent(inv) + finally: + delattr(instance, "_loongsuite_invoke_invocation") + + +def wrap_combined_agent_hook_on_step_start( + handler: ExtendedTelemetryHandler, wrapped, instance, args, kwargs +): + round_no = getattr(instance, "_loongsuite_react_round", 0) + 1 + setattr(instance, "_loongsuite_react_round", round_no) + inv = ReactStepInvocation(round=round_no) + handler.start_react_step(inv) + setattr(instance, "_loongsuite_react_invocation", inv) + try: + return wrapped(*args, **kwargs) + except Exception as exc: + handler.fail_react_step(inv, Error(message=str(exc), type=type(exc))) + delattr(instance, "_loongsuite_react_invocation") + raise + + +def wrap_combined_agent_hook_on_step_done( + handler: ExtendedTelemetryHandler, wrapped, instance, args, kwargs +): + try: + return wrapped(*args, **kwargs) + finally: + inv = getattr(instance, "_loongsuite_react_invocation", None) + if inv is None: + return + step = kwargs.get("step") + if step is not None: + fr = getattr(step, "exit_status", None) + inv.finish_reason = str(fr) if fr is not None else None + try: + handler.stop_react_step(inv) + finally: + delattr(instance, "_loongsuite_react_invocation") + + +def wrap_default_agent_handle_action( + handler: ExtendedTelemetryHandler, wrapped, instance, args, kwargs +): + """Wrap ``handle_action`` so tool spans end on error paths (not always paired hooks).""" + step = args[0] if args else kwargs.get("step") + resolved_tool = tool_name_from_sweagent_step(step) + inv = ExecuteToolInvocation( + tool_name=resolved_tool, + provider=SWEAGENT_PROVIDER, + tool_type="function", + ) + if step is not None: + inv.tool_call_arguments = tool_call_arguments_from_sweagent_step(step) + handler.start_execute_tool(inv) + try: + result = wrapped(*args, **kwargs) + except Exception as exc: + handler.fail_execute_tool(inv, Error(message=str(exc), type=type(exc))) + raise + if step is not None: + inv.tool_call_result = getattr(step, "observation", None) + handler.stop_execute_tool(inv) + return result + + +def bind_extended_handler(handler: ExtendedTelemetryHandler, fn): + return lambda wrapped, instance, args, kwargs: fn( + handler, wrapped, instance, args, kwargs + ) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/version.py b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/version.py new file mode 100644 index 000000000..7bee975f0 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/src/opentelemetry/instrumentation/sweagent/version.py @@ -0,0 +1,15 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.5.0.dev" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/conftest.py b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/conftest.py new file mode 100644 index 000000000..8adca274e --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/conftest.py @@ -0,0 +1,74 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +import tempfile +from pathlib import Path + +import pytest + +from opentelemetry.instrumentation.sweagent import SweagentInstrumentor +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) + + +def _ensure_sweagent_packaged_layout() -> None: + """Editable/git installs ship ``config``/``tools``/``trajectories``; pip git often does not. + + ``sweagent`` asserts these directories exist at import time. + """ + if os.environ.get("SWE_AGENT_CONFIG_DIR"): + return + root = Path(tempfile.mkdtemp(prefix="sweagent-instr-test-")) + for name in ("config", "tools", "trajectories"): + (root / name).mkdir(parents=True, exist_ok=True) + os.environ["SWE_AGENT_CONFIG_DIR"] = str(root / "config") + os.environ["SWE_AGENT_TOOLS_DIR"] = str(root / "tools") + os.environ["SWE_AGENT_TRAJECTORY_DIR"] = str(root / "trajectories") + + +def pytest_configure(config): + del config + _ensure_sweagent_packaged_layout() + os.environ.setdefault( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + os.environ.setdefault( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + + +@pytest.fixture(name="span_exporter") +def fixture_span_exporter(): + return InMemorySpanExporter() + + +@pytest.fixture(name="tracer_provider") +def fixture_tracer_provider(span_exporter): + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + return provider + + +@pytest.fixture +def instrumented_sweagent(tracer_provider): + inst = SweagentInstrumentor() + inst.instrument(tracer_provider=tracer_provider) + yield inst + inst.uninstrument() diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/requirements.latest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/requirements.latest.txt new file mode 100644 index 000000000..6072f179c --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/requirements.latest.txt @@ -0,0 +1,12 @@ +# Test requirements (PyPI OpenTelemetry 1.37 / 0.58b0; no git test_deps in tox). + +sweagent @ git+https://github.com/SWE-agent/SWE-agent.git@v1.1.0 +pytest +wrapt +opentelemetry-api==1.37 +opentelemetry-sdk==1.37 +opentelemetry-instrumentation==0.58b0 +opentelemetry-semantic-conventions==0.58b0 + +-e instrumentation-loongsuite/loongsuite-instrumentation-sweagent +-e util/opentelemetry-util-genai diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/requirements.oldest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/requirements.oldest.txt new file mode 100644 index 000000000..9765b8ac4 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/requirements.oldest.txt @@ -0,0 +1,12 @@ +# Oldest supported test pins (align with other loongsuite instrumentations). + +sweagent @ git+https://github.com/SWE-agent/SWE-agent.git@v1.1.0 +pytest==7.4.4 +wrapt==1.17.3 +opentelemetry-api==1.37 +opentelemetry-sdk==1.37 +opentelemetry-instrumentation==0.58b0 +opentelemetry-semantic-conventions==0.58b0 + +-e instrumentation-loongsuite/loongsuite-instrumentation-sweagent +-e util/opentelemetry-util-genai diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/test_spans.py b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/test_spans.py new file mode 100644 index 000000000..16147078a --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/test_spans.py @@ -0,0 +1,270 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import json +from unittest.mock import MagicMock + +from sweagent.agent.hooks.abstract import CombinedAgentHook +from sweagent.run.hooks.abstract import CombinedRunHooks +from sweagent.types import AgentInfo, AgentRunResult, StepOutput + +from opentelemetry.instrumentation.sweagent.patch import ( + SWEAGENT_AGENT_NAME, + SWEAGENT_BASH_TOOL_NAME, + tool_call_arguments_from_sweagent_step, + tool_name_from_sweagent_step, + wrap_default_agent_handle_action, +) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) +from opentelemetry.util.genai.extended_handler import ExtendedTelemetryHandler +from opentelemetry.util.genai.extended_semconv.gen_ai_extended_attributes import ( + GEN_AI_SESSION_ID, + GEN_AI_SPAN_KIND, + GenAiSpanKindValues, +) + +ENTRY_SPAN_NAME = "enter_ai_application_system" +REACT_SPAN_NAME = "react step" +TOOL_SPAN_PREFIX = "execute_tool " +INVOKE_AGENT_SPAN_NAME = f"{GenAI.GenAiOperationNameValues.INVOKE_AGENT.value} {SWEAGENT_AGENT_NAME}" + + +def test_tool_name_from_step_llm_tool_calls(): + openai_style = { + "type": "function", + "id": "call_1", + "function": {"name": "bash", "arguments": '{"command": "ls"}'}, + } + step = StepOutput( + action="ls", + tool_calls=[openai_style], + ) + assert tool_name_from_sweagent_step(step) == "bash" + + step2 = StepOutput( + action="submit", + tool_calls=[ + { + "type": "function", + "id": "call_s", + "function": {"name": "submit", "arguments": "{}"}, + } + ], + ) + assert tool_name_from_sweagent_step(step2) == "submit" + + +def test_tool_name_from_step_fallback_without_tool_calls(): + assert tool_name_from_sweagent_step(None) == SWEAGENT_BASH_TOOL_NAME + assert ( + tool_name_from_sweagent_step(StepOutput(action="ls -la")) + == SWEAGENT_BASH_TOOL_NAME + ) + + +def test_tool_call_arguments_from_step_llm_json(): + step = StepOutput( + action="ls # rendered for bash", + tool_calls=[ + { + "function": { + "name": "bash", + "arguments": '{"command": "ls -la"}', + } + } + ], + ) + assert tool_call_arguments_from_sweagent_step(step) == { + "command": "ls -la" + } + + empty_args = StepOutput( + action="touch x", + tool_calls=[{"function": {"name": "bash", "arguments": "{}"}}], + ) + assert tool_call_arguments_from_sweagent_step(empty_args) == {} + + +def test_tool_call_arguments_from_step_non_json_string_kept(): + step = StepOutput( + action="fallback_action", + tool_calls=[ + {"function": {"name": "bash", "arguments": "not valid json {"}} + ], + ) + assert tool_call_arguments_from_sweagent_step(step) == "not valid json {" + + +def test_tool_call_arguments_fallback_without_tool_calls(): + assert tool_call_arguments_from_sweagent_step(None) is None + assert ( + tool_call_arguments_from_sweagent_step(StepOutput(action="ls -la")) + == "ls -la" + ) + + +def _get_attrs(span): + return dict(span.attributes or {}) + + +def test_entry_run_hooks_span(instrumented_sweagent, span_exporter): + hooks = CombinedRunHooks() + prob = MagicMock() + prob.id = "issue-42" + prob.get_problem_statement.return_value = "Fix the crash" + + hooks.on_instance_start(index=0, env=MagicMock(), problem_statement=prob) + result = AgentRunResult( + info=AgentInfo(exit_status="Submitted"), trajectory=[] + ) + hooks.on_instance_completed(result=result) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == ENTRY_SPAN_NAME + attrs = _get_attrs(span) + assert attrs.get(GenAI.GEN_AI_OPERATION_NAME) == "enter" + assert attrs.get(GEN_AI_SPAN_KIND) == GenAiSpanKindValues.ENTRY.value + assert attrs.get(GEN_AI_SESSION_ID) == "issue-42" + + +def test_react_step_span(instrumented_sweagent, span_exporter): + hooks = CombinedAgentHook() + hooks.on_step_start() + step = StepOutput(done=False, exit_status=None) + hooks.on_step_done(step=step, info=AgentInfo()) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == REACT_SPAN_NAME + attrs = _get_attrs(span) + assert attrs.get(GenAI.GEN_AI_OPERATION_NAME) == "react" + assert attrs.get(GEN_AI_SPAN_KIND) == GenAiSpanKindValues.STEP.value + assert attrs.get("gen_ai.react.round") == 1 + + +def test_invoke_agent_span(instrumented_sweagent, span_exporter): + hooks = CombinedAgentHook() + hooks.on_run_start() + hooks.on_run_done(trajectory=[], info=AgentInfo()) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == INVOKE_AGENT_SPAN_NAME + attrs = _get_attrs(span) + assert attrs.get(GenAI.GEN_AI_OPERATION_NAME) == "invoke_agent" + assert attrs.get(GenAI.GEN_AI_AGENT_NAME) == SWEAGENT_AGENT_NAME + assert attrs.get(GEN_AI_SPAN_KIND) == GenAiSpanKindValues.AGENT.value + in_raw = attrs.get(GenAI.GEN_AI_INPUT_MESSAGES) + out_raw = attrs.get(GenAI.GEN_AI_OUTPUT_MESSAGES) + assert in_raw is not None and out_raw is not None + in_msgs = json.loads(in_raw) + assert in_msgs[0]["parts"][0]["content"] == "(empty)" + out_msgs = json.loads(out_raw) + assert out_msgs[0]["role"] == "assistant" + + +def test_handle_action_execute_tool_span(tracer_provider, span_exporter): + handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider) + step = StepOutput( + action="ls -la", + observation="", + tool_calls=[ + { + "type": "function", + "id": "call_abc", + "function": {"name": "bash", "arguments": "{}"}, + } + ], + ) + + def fake_handle_action(*args, **kwargs): + step.observation = "file.txt" + return step + + wrap_default_agent_handle_action( + handler, fake_handle_action, MagicMock(), (step,), {} + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == f"{TOOL_SPAN_PREFIX}bash" + attrs = _get_attrs(span) + assert attrs.get(GenAI.GEN_AI_OPERATION_NAME) == "execute_tool" + assert attrs.get(GenAI.GEN_AI_TOOL_NAME) == "bash" + assert attrs.get(GEN_AI_SPAN_KIND) == GenAiSpanKindValues.TOOL.value + + +def test_nested_hook_hierarchy(instrumented_sweagent, span_exporter): + handler = instrumented_sweagent._handler + run_hooks = CombinedRunHooks() + agent_hooks = CombinedAgentHook() + prob = MagicMock() + prob.id = "nested-1" + prob.get_problem_statement.return_value = "task" + + run_hooks.on_instance_start( + index=0, env=MagicMock(), problem_statement=prob + ) + agent_hooks.on_run_start() + agent_hooks.on_step_start() + step = StepOutput(action="true", observation="") + + def fake_handle_action(*args, **kwargs): + step.observation = "ok" + return step + + wrap_default_agent_handle_action( + handler, fake_handle_action, MagicMock(), (step,), {} + ) + + agent_hooks.on_step_done(step=step, info=AgentInfo()) + agent_hooks.on_run_done(trajectory=[], info=AgentInfo()) + result = AgentRunResult(info=AgentInfo(exit_status="ok"), trajectory=[]) + run_hooks.on_instance_completed(result=result) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 4 + by_name = {} + for s in spans: + by_name.setdefault(s.name, []).append(s) + + assert ENTRY_SPAN_NAME in by_name + assert INVOKE_AGENT_SPAN_NAME in by_name + assert REACT_SPAN_NAME in by_name + tool_name = f"execute_tool {SWEAGENT_BASH_TOOL_NAME}" + assert tool_name in by_name + + entry_span = by_name[ENTRY_SPAN_NAME][0] + invoke_span = by_name[INVOKE_AGENT_SPAN_NAME][0] + react_span = by_name[REACT_SPAN_NAME][0] + tool_span = by_name[tool_name][0] + + assert invoke_span.parent.span_id == entry_span.context.span_id + assert react_span.parent.span_id == invoke_span.context.span_id + assert tool_span.parent.span_id == react_span.context.span_id + + assert ( + invoke_span.attributes.get(GenAI.GEN_AI_CONVERSATION_ID) == "nested-1" + ) + in_msgs = json.loads(invoke_span.attributes[GenAI.GEN_AI_INPUT_MESSAGES]) + assert in_msgs[0]["parts"][0]["content"] == "task" diff --git a/tox-loongsuite.ini b/tox-loongsuite.ini index 345ffbac8..ed88633b3 100644 --- a/tox-loongsuite.ini +++ b/tox-loongsuite.ini @@ -74,6 +74,10 @@ envlist = ; loongsuite-instrumentation-copaw py3{10,11,12,13}-test-loongsuite-instrumentation-copaw lint-loongsuite-instrumentation-copaw + + ; loongsuite-instrumentation-sweagent (SWE-agent requires Python >= 3.11) + py3{11,12,13}-test-loongsuite-instrumentation-sweagent-{oldest,latest} + lint-loongsuite-instrumentation-sweagent [testenv] test_deps = @@ -141,6 +145,11 @@ deps = copaw: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-copaw/tests/requirements.txt + sweagent-oldest: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/requirements.oldest.txt + ; sweagent-latest: use PyPI-pinned OTEL only (no {[testenv]test_deps}: git main conflicts with instrumentation==0.58b0) + sweagent-latest: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/requirements.latest.txt + lint-loongsuite-instrumentation-sweagent: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests/requirements.oldest.txt + ; FIXME: add coverage testing allowlist_externals = sh @@ -200,6 +209,9 @@ commands = test-loongsuite-instrumentation-copaw: pytest {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-copaw/tests {posargs} lint-loongsuite-instrumentation-copaw: python -m ruff check {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-copaw + + test-loongsuite-instrumentation-sweagent: pytest {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-sweagent/tests {posargs} + lint-loongsuite-instrumentation-sweagent: python -m ruff check {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-sweagent ; TODO: add coverage commands ; coverage: {toxinidir}/scripts/coverage.sh