diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/README.md b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/README.md new file mode 100644 index 000000000..4d4f4d7b1 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/README.md @@ -0,0 +1,32 @@ +# LoongSuite slop-code-bench Instrumentation + +OpenTelemetry instrumentation for the [slop-code-bench](https://github.com/SprocketLab/slop-code-bench) benchmark orchestrator. + +## Span Tree + +``` +ENTRY "slop-code.enter" +└── CHAIN "workflow.{problem_name}" + ├── TASK "task.{checkpoint_name}" + │ └── AGENT "agent.{agent_type}" + │ ├── STEP "react.step.{N}" [MiniSWE only] + │ └── ... + ├── TASK "task.{checkpoint_name}" + │ └── AGENT "agent.{agent_type}" + └── ... +LLM "chat {model_name}" [Rubric Judge] +``` + +## Installation + +```bash +pip install loongsuite-instrumentation-slop-code +``` + +## Usage + +```python +from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor + +SlopCodeInstrumentor().instrument() +``` diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/pyproject.toml new file mode 100644 index 000000000..b443381c2 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/pyproject.toml @@ -0,0 +1,61 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "loongsuite-instrumentation-slop-code" +dynamic = ["version"] +description = "LoongSuite slop-code-bench instrumentation" +readme = "README.md" +license = "Apache-2.0" +requires-python = ">=3.10,<4" +authors = [ + { name = "Zhiyong Liu", email = "liuzhiyong.lzy@alibaba-inc.com" }, + { name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "opentelemetry-api >= 1.37.0", + "opentelemetry-instrumentation >= 0.58b0", + "opentelemetry-semantic-conventions >= 0.58b0", + "wrapt >= 1.14.0, < 2.0.0", + "opentelemetry-util-genai >= 0.3b0.dev0", +] + +[project.optional-dependencies] +instruments = [ + "slop-code-bench >= 0.1", +] +test = [ + "pytest", + "pytest-asyncio", + "pytest-forked", + "opentelemetry-sdk", +] + +[project.entry-points.opentelemetry_instrumentor] +slop_code = "opentelemetry.instrumentation.slop_code:SlopCodeInstrumentor" + +[project.urls] +Homepage = "https://github.com/alibaba/loongsuite-python-agent/tree/main/instrumentation-loongsuite/loongsuite-instrumentation-slop-code" +Repository = "https://github.com/alibaba/loongsuite-python-agent" + +[tool.hatch.version] +path = "src/opentelemetry/instrumentation/slop_code/version.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/src", + "/tests", +] + +[tool.hatch.build.targets.wheel] +packages = ["src/opentelemetry"] diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/__init__.py new file mode 100644 index 000000000..973cd969e --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/__init__.py @@ -0,0 +1,211 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +OpenTelemetry slop-code-bench Instrumentation + +Instruments the slop-code benchmark orchestrator lifecycle: +- ENTRY: run_agent (CLI entrypoint) +- CHAIN/workflow: run_agent_on_problem (per-problem) +- TASK: AgentRunner._run_checkpoint (per-checkpoint) +- AGENT: Agent.run_checkpoint (concrete agent invocation) +- STEP: MiniSWEAgent.agent_step (ReAct iteration) +- LLM: grade_file_async (Rubric Judge) +""" + +import logging +from typing import Any, Collection + +from wrapt import wrap_function_wrapper + +from opentelemetry import trace as trace_api +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor +from opentelemetry.instrumentation.slop_code.package import _instruments +from opentelemetry.instrumentation.slop_code.version import __version__ +from opentelemetry.instrumentation.slop_code.wrappers.agent import ( + _AgentRunCheckpointWrapper, +) +from opentelemetry.instrumentation.slop_code.wrappers.entry import ( + _EntryWrapper, +) +from opentelemetry.instrumentation.slop_code.wrappers.llm import ( + _RubricGradeWrapper, +) +from opentelemetry.instrumentation.slop_code.wrappers.step import ( + _MiniSWEStepWrapper, +) +from opentelemetry.instrumentation.slop_code.wrappers.task import ( + _TaskRunCheckpointWrapper, +) +from opentelemetry.instrumentation.slop_code.wrappers.workflow import ( + _WorkflowWrapper, +) +from opentelemetry.instrumentation.utils import unwrap + +logger = logging.getLogger(__name__) + +__all__ = ["SlopCodeInstrumentor", "__version__"] + +_MODULE_ENTRY = "slop_code.entrypoints.commands.run_agent" +_MODULE_WORKER = "slop_code.entrypoints.problem_runner.worker" +# slop_code.entrypoints.problem_runner.driver re-imports +# `run_agent_on_problem` via `from .worker import run_agent_on_problem` +# at package-load time, capturing the original function reference. Because +# our wrap happens after that bind, we must additionally replace the local +# binding inside `driver` itself, otherwise the worker subprocess still +# calls the un-wrapped original and the CHAIN span never fires. +_MODULE_DRIVER = "slop_code.entrypoints.problem_runner.driver" +_MODULE_RUNNER = "slop_code.agent_runner.runner" +_MODULE_AGENT = "slop_code.agent_runner.agent" +_MODULE_MINISWE = "slop_code.agent_runner.agents.miniswe" +_MODULE_RUBRIC = "slop_code.metrics.rubric.router" + + +class SlopCodeInstrumentor(BaseInstrumentor): + """OpenTelemetry instrumentor for slop-code-bench framework.""" + + def instrumentation_dependencies(self) -> Collection[str]: + return _instruments + + def _instrument(self, **kwargs: Any) -> None: + tracer_provider = kwargs.get("tracer_provider") + tracer = trace_api.get_tracer( + __name__, + __version__, + tracer_provider=tracer_provider, + ) + + # 3.1 ENTRY span: run_agent + try: + wrap_function_wrapper( + module=_MODULE_ENTRY, + name="run_agent", + wrapper=_EntryWrapper(tracer), + ) + except Exception as e: + logger.warning(f"Could not wrap run_agent: {e}") + + # 3.2 CHAIN span: run_agent_on_problem + workflow_wrapper = _WorkflowWrapper(tracer) + try: + wrap_function_wrapper( + module=_MODULE_WORKER, + name="run_agent_on_problem", + wrapper=workflow_wrapper, + ) + except Exception as e: + logger.warning(f"Could not wrap run_agent_on_problem: {e}") + # Also wrap the re-bound name inside driver. driver.py imports + # run_agent_on_problem at module-load time via `from .worker import ...`, + # so the local name escapes our worker-module patch. The worker + # subprocess inherits this stale reference via fork(), and CHAIN + # spans never fire unless we patch the local re-bind too. + try: + wrap_function_wrapper( + module=_MODULE_DRIVER, + name="run_agent_on_problem", + wrapper=workflow_wrapper, + ) + except Exception as e: + logger.warning(f"Could not wrap driver.run_agent_on_problem: {e}") + + # 3.3 TASK span: AgentRunner._run_checkpoint + try: + wrap_function_wrapper( + module=_MODULE_RUNNER, + name="AgentRunner._run_checkpoint", + wrapper=_TaskRunCheckpointWrapper(tracer), + ) + except Exception as e: + logger.warning(f"Could not wrap AgentRunner._run_checkpoint: {e}") + + # 3.4 AGENT span: Agent.run_checkpoint + try: + wrap_function_wrapper( + module=_MODULE_AGENT, + name="Agent.run_checkpoint", + wrapper=_AgentRunCheckpointWrapper(tracer), + ) + except Exception as e: + logger.warning(f"Could not wrap Agent.run_checkpoint: {e}") + + # 3.5 STEP span: MiniSWEAgent.agent_step + try: + wrap_function_wrapper( + module=_MODULE_MINISWE, + name="MiniSWEAgent.agent_step", + wrapper=_MiniSWEStepWrapper(tracer), + ) + except Exception as e: + logger.debug(f"Could not wrap MiniSWEAgent.agent_step: {e}") + + # 3.6 LLM span: grade_file_async + try: + wrap_function_wrapper( + module=_MODULE_RUBRIC, + name="grade_file_async", + wrapper=_RubricGradeWrapper(tracer), + ) + except Exception as e: + logger.debug(f"Could not wrap grade_file_async: {e}") + + def _uninstrument(self, **kwargs: Any) -> None: + try: + import slop_code.entrypoints.commands.run_agent as mod_entry + + unwrap(mod_entry, "run_agent") + except Exception: + pass + + try: + import slop_code.entrypoints.problem_runner.worker as mod_worker + + unwrap(mod_worker, "run_agent_on_problem") + except Exception: + pass + + try: + import slop_code.entrypoints.problem_runner.driver as mod_driver + + unwrap(mod_driver, "run_agent_on_problem") + except Exception: + pass + + try: + import slop_code.agent_runner.runner as mod_runner + + unwrap(mod_runner.AgentRunner, "_run_checkpoint") + except Exception: + pass + + try: + import slop_code.agent_runner.agent as mod_agent + + unwrap(mod_agent.Agent, "run_checkpoint") + except Exception: + pass + + try: + import slop_code.agent_runner.agents.miniswe as mod_miniswe + + unwrap(mod_miniswe.MiniSWEAgent, "agent_step") + except Exception: + pass + + try: + import slop_code.metrics.rubric.router as mod_rubric + + unwrap(mod_rubric, "grade_file_async") + except Exception: + pass diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/package.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/package.py new file mode 100644 index 000000000..13b6fe785 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/package.py @@ -0,0 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_instruments = ("slop-code-bench >= 0.1",) + +_supports_metrics = True diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/utils.py new file mode 100644 index 000000000..ee7fce73f --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/utils.py @@ -0,0 +1,51 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions for slop-code instrumentation.""" + +from typing import Any, Optional + +from opentelemetry.trace import Span + +SYSTEM_NAME = "slop-code" +MAX_ATTR_LEN = 1024 + + +def safe_get(obj: Any, attr: str, default: Any = None) -> Any: + """Safely get an attribute from an object, returning default on failure.""" + try: + return getattr(obj, attr, default) + except Exception: + return default + + +def safe_get_nested(obj: Any, *attrs: str, default: Any = None) -> Any: + """Safely traverse nested attributes.""" + current = obj + for attr in attrs: + try: + current = getattr(current, attr) + if current is None: + return default + except (AttributeError, TypeError): + return default + return current + + +def set_optional_attr(span: Span, key: str, value: Optional[Any]) -> None: + """Set a span attribute only if value is not None.""" + if value is not None: + if isinstance(value, str) and len(value) > MAX_ATTR_LEN: + value = value[:MAX_ATTR_LEN] + span.set_attribute(key, value) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/version.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/version.py new file mode 100644 index 000000000..7bee975f0 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/version.py @@ -0,0 +1,15 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.5.0.dev" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/__init__.py new file mode 100644 index 000000000..b0a6f4284 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/__init__.py @@ -0,0 +1,13 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/agent.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/agent.py new file mode 100644 index 000000000..94cb2b88a --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/agent.py @@ -0,0 +1,91 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AGENT span wrapper for Agent.run_checkpoint.""" + +import logging + +from opentelemetry import trace as trace_api +from opentelemetry.instrumentation.slop_code.utils import ( + SYSTEM_NAME, + safe_get, + safe_get_nested, + set_optional_attr, +) +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes +from opentelemetry.trace import SpanKind, Status, StatusCode +from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes + +logger = logging.getLogger(__name__) + + +class _AgentRunCheckpointWrapper: + """Wrapper for Agent.run_checkpoint to create AGENT span.""" + + def __init__(self, tracer: trace_api.Tracer): + self._tracer = tracer + + def __call__(self, wrapped, instance, args, kwargs): + agent_name = type(instance).__name__ + problem_name = safe_get(instance, "problem_name", "unknown") + + span_name = f"agent.{agent_name}" + + attrs = { + gen_ai_attributes.GEN_AI_OPERATION_NAME: "invoke_agent", + gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME, + gen_ai_extended_attributes.GEN_AI_SPAN_KIND: gen_ai_extended_attributes.GenAiSpanKindValues.AGENT.value, + "gen_ai.agent.name": agent_name, + "slop_code.problem.name": str(problem_name), + } + + with self._tracer.start_as_current_span( + name=span_name, + kind=SpanKind.INTERNAL, + attributes=attrs, + ) as span: + try: + result = wrapped(*args, **kwargs) + + # Extract after-call attributes from result + if result is not None: + usage = safe_get(result, "usage") + if usage is not None: + net_tokens = safe_get(usage, "net_tokens") + if net_tokens is not None: + set_optional_attr( + span, + gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS, + safe_get(net_tokens, "input"), + ) + set_optional_attr( + span, + gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS, + safe_get(net_tokens, "output"), + ) + cost = safe_get(usage, "cost") + set_optional_attr(span, "slop_code.usage.cost", cost) + steps = safe_get(usage, "steps") + set_optional_attr(span, "slop_code.usage.steps", steps) + + elapsed = safe_get(result, "elapsed") + set_optional_attr(span, "slop_code.elapsed_seconds", elapsed) + + span.set_status(Status(StatusCode.OK)) + return result + except Exception as e: + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, str(e))) + span.set_attribute("error.type", type(e).__name__) + raise diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/entry.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/entry.py new file mode 100644 index 000000000..d31e666f1 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/entry.py @@ -0,0 +1,58 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ENTRY span wrapper for slop_code.entrypoints.commands.run_agent.run_agent.""" + +import logging + +from opentelemetry import trace as trace_api +from opentelemetry.instrumentation.slop_code.utils import ( + SYSTEM_NAME, + safe_get, + safe_get_nested, + set_optional_attr, +) +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes +from opentelemetry.trace import SpanKind, Status, StatusCode +from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes + +logger = logging.getLogger(__name__) + + +class _EntryWrapper: + """Wrapper for run_agent to create ENTRY span.""" + + def __init__(self, tracer: trace_api.Tracer): + self._tracer = tracer + + def __call__(self, wrapped, instance, args, kwargs): + span_name = "slop-code.enter" + + with self._tracer.start_as_current_span( + name=span_name, + kind=SpanKind.INTERNAL, + attributes={ + gen_ai_attributes.GEN_AI_OPERATION_NAME: "enter", + gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME, + gen_ai_extended_attributes.GEN_AI_SPAN_KIND: gen_ai_extended_attributes.GenAiSpanKindValues.ENTRY.value, + }, + ) as span: + try: + result = wrapped(*args, **kwargs) + span.set_status(Status(StatusCode.OK)) + return result + except Exception as e: + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, str(e))) + raise diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/llm.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/llm.py new file mode 100644 index 000000000..0aaba20b8 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/llm.py @@ -0,0 +1,104 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""LLM span wrapper for grade_file_async (Rubric Judge).""" + +import logging + +from opentelemetry import trace as trace_api +from opentelemetry.instrumentation.slop_code.utils import ( + SYSTEM_NAME, + set_optional_attr, +) +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes +from opentelemetry.trace import SpanKind, Status, StatusCode +from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes + +logger = logging.getLogger(__name__) + + +class _RubricGradeWrapper: + """Wrapper for grade_file_async to create LLM span.""" + + def __init__(self, tracer: trace_api.Tracer): + self._tracer = tracer + + async def __call__(self, wrapped, instance, args, kwargs): + # grade_file_async(prompt_prefix, criteria_text, file_name, model, provider, temperature, ...) + model = kwargs.get("model") or (args[3] if len(args) > 3 else "unknown") + provider = kwargs.get("provider") or (args[4] if len(args) > 4 else None) + temperature = kwargs.get("temperature") or (args[5] if len(args) > 5 else None) + + # Determine system name from provider + system_name = SYSTEM_NAME + if provider is not None: + provider_val = provider.value if hasattr(provider, "value") else str(provider) + system_name = provider_val.lower() + + span_name = f"chat {model}" + + attrs = { + gen_ai_attributes.GEN_AI_OPERATION_NAME: "chat", + gen_ai_attributes.GEN_AI_SYSTEM: system_name, + gen_ai_extended_attributes.GEN_AI_SPAN_KIND: gen_ai_extended_attributes.GenAiSpanKindValues.LLM.value, + gen_ai_attributes.GEN_AI_REQUEST_MODEL: str(model), + } + + if temperature is not None: + attrs[gen_ai_attributes.GEN_AI_REQUEST_TEMPERATURE] = float(temperature) + + with self._tracer.start_as_current_span( + name=span_name, + kind=SpanKind.CLIENT, + attributes=attrs, + ) as span: + try: + result = await wrapped(*args, **kwargs) + + # result is tuple[list[dict], dict[str, Any]] + if isinstance(result, tuple) and len(result) >= 2: + response_data = result[1] + if isinstance(response_data, dict): + _set_usage_from_response(span, response_data) + response_id = response_data.get("id") + set_optional_attr(span, "gen_ai.response.id", response_id) + + span.set_status(Status(StatusCode.OK)) + return result + except Exception as e: + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, str(e))) + raise + + +def _set_usage_from_response(span, response_data: dict) -> None: + """Extract and set token usage attributes from response_data.""" + usage = response_data.get("usage") + if not isinstance(usage, dict): + return + + # OpenRouter format: prompt_tokens / completion_tokens + # Bedrock format (normalized): input_tokens / output_tokens + input_tokens = usage.get("prompt_tokens") or usage.get("input_tokens") + output_tokens = usage.get("completion_tokens") or usage.get("output_tokens") + + set_optional_attr(span, gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS, input_tokens) + set_optional_attr(span, gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens) + + # Cache tokens (OpenRouter specific) + cache_read = usage.get("cache_read_input_tokens") + set_optional_attr(span, gen_ai_extended_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read) + + cache_creation = usage.get("cache_creation_input_tokens") + set_optional_attr(span, gen_ai_extended_attributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, cache_creation) diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/step.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/step.py new file mode 100644 index 000000000..93219fe89 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/step.py @@ -0,0 +1,110 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""STEP span wrapper for MiniSWEAgent.agent_step.""" + +import logging + +from opentelemetry import trace as trace_api +from opentelemetry.instrumentation.slop_code.utils import ( + SYSTEM_NAME, + safe_get, + safe_get_nested, + set_optional_attr, +) +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes +from opentelemetry.trace import SpanKind, Status, StatusCode +from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes + +logger = logging.getLogger(__name__) + + +class _MiniSWEStepWrapper: + """Wrapper for MiniSWEAgent.agent_step to create STEP span.""" + + def __init__(self, tracer: trace_api.Tracer): + self._tracer = tracer + + def __call__(self, wrapped, instance, args, kwargs): + # Determine current step number (1-based) + usage = safe_get(instance, "usage") + current_steps = safe_get(usage, "steps", 0) if usage else 0 + step_num = current_steps + 1 + + span_name = f"react.step.{step_num}" + + attrs = { + gen_ai_attributes.GEN_AI_OPERATION_NAME: "react", + gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME, + gen_ai_extended_attributes.GEN_AI_SPAN_KIND: gen_ai_extended_attributes.GenAiSpanKindValues.STEP.value, + gen_ai_extended_attributes.GEN_AI_REACT_ROUND: step_num, + } + + with self._tracer.start_as_current_span( + name=span_name, + kind=SpanKind.INTERNAL, + attributes=attrs, + ) as span: + try: + result = wrapped(*args, **kwargs) + + # Extract token usage from result if available + if isinstance(result, dict): + token_usage = result.get("token_usage") + if token_usage is not None: + set_optional_attr( + span, + gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS, + safe_get(token_usage, "input"), + ) + set_optional_attr( + span, + gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS, + safe_get(token_usage, "output"), + ) + set_optional_attr( + span, + gen_ai_extended_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, + safe_get(token_usage, "cache_read"), + ) + set_optional_attr( + span, + gen_ai_extended_attributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, + safe_get(token_usage, "cache_write"), + ) + step_cost = result.get("step_cost") + set_optional_attr(span, "slop_code.step.cost", step_cost) + elif result is not None: + # Result might be a tuple or object; try attribute access + token_usage = safe_get(result, "token_usage") + if token_usage is not None: + set_optional_attr( + span, + gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS, + safe_get(token_usage, "input"), + ) + set_optional_attr( + span, + gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS, + safe_get(token_usage, "output"), + ) + + span.set_status(Status(StatusCode.OK)) + span.set_attribute(gen_ai_extended_attributes.GEN_AI_REACT_FINISH_REASON, "stop") + return result + except Exception as e: + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, str(e))) + span.set_attribute(gen_ai_extended_attributes.GEN_AI_REACT_FINISH_REASON, "error") + raise diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/task.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/task.py new file mode 100644 index 000000000..b0f60a4fc --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/task.py @@ -0,0 +1,91 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TASK span wrapper for AgentRunner._run_checkpoint.""" + +import logging + +from opentelemetry import trace as trace_api +from opentelemetry.instrumentation.slop_code.utils import ( + SYSTEM_NAME, + safe_get, + safe_get_nested, + set_optional_attr, +) +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes +from opentelemetry.trace import SpanKind, Status, StatusCode +from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes + +logger = logging.getLogger(__name__) + + +class _TaskRunCheckpointWrapper: + """Wrapper for AgentRunner._run_checkpoint to create TASK span.""" + + def __init__(self, tracer: trace_api.Tracer): + self._tracer = tracer + + def __call__(self, wrapped, instance, args, kwargs): + # _run_checkpoint(self, checkpoint, checkpoint_save_dir, is_first_checkpoint) + checkpoint = args[0] if args else kwargs.get("checkpoint") + is_first_checkpoint = args[2] if len(args) > 2 else kwargs.get("is_first_checkpoint", False) + + checkpoint_name = safe_get(checkpoint, "name", "unknown") + checkpoint_order = safe_get(checkpoint, "order") + + span_name = f"task.{checkpoint_name}" + + attrs = { + gen_ai_attributes.GEN_AI_OPERATION_NAME: "run_task", + gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME, + gen_ai_extended_attributes.GEN_AI_SPAN_KIND: "TASK", + "slop_code.checkpoint.name": str(checkpoint_name), + } + + if checkpoint_order is not None: + attrs["slop_code.checkpoint.order"] = checkpoint_order + attrs["slop_code.is_first_checkpoint"] = bool(is_first_checkpoint) + + with self._tracer.start_as_current_span( + name=span_name, + kind=SpanKind.INTERNAL, + attributes=attrs, + ) as span: + try: + result = wrapped(*args, **kwargs) + + # Extract after-call attributes from summary + if result is not None: + had_error = safe_get(result, "had_error") + set_optional_attr(span, "slop_code.had_error", had_error) + + passed_policy = safe_get(result, "passed_policy") + set_optional_attr(span, "slop_code.passed_policy", passed_policy) + + # Token usage from agent + agent = safe_get(instance, "agent") + if agent is not None: + net_tokens = safe_get_nested(agent, "usage", "net_tokens") + if net_tokens is not None: + input_tokens = safe_get(net_tokens, "input") + output_tokens = safe_get(net_tokens, "output") + set_optional_attr(span, gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS, input_tokens) + set_optional_attr(span, gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens) + + span.set_status(Status(StatusCode.OK)) + return result + except Exception as e: + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, str(e))) + raise diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/workflow.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/workflow.py new file mode 100644 index 000000000..4793d4286 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/workflow.py @@ -0,0 +1,120 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""CHAIN/workflow span wrapper for run_agent_on_problem.""" + +import logging + +from opentelemetry import trace as trace_api +from opentelemetry.instrumentation.slop_code.utils import ( + SYSTEM_NAME, + safe_get, + safe_get_nested, + set_optional_attr, +) +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes +from opentelemetry.trace import SpanKind, Status, StatusCode +from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes + +logger = logging.getLogger(__name__) + + +class _WorkflowWrapper: + """Wrapper for run_agent_on_problem to create workflow (CHAIN) span.""" + + def __init__(self, tracer: trace_api.Tracer): + self._tracer = tracer + + def __call__(self, wrapped, instance, args, kwargs): + # run_agent_on_problem(problem_config, problem_name, config, progress_queue, output_path) + problem_name = args[1] if len(args) > 1 else kwargs.get("problem_name", "unknown") + config = args[2] if len(args) > 2 else kwargs.get("config") + + span_name = f"workflow.{problem_name}" + + attrs = { + gen_ai_attributes.GEN_AI_OPERATION_NAME: "workflow", + gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME, + gen_ai_extended_attributes.GEN_AI_SPAN_KIND: "CHAIN", + "slop_code.problem.name": str(problem_name), + } + + # Extract optional attributes from config + if config is not None: + model_name = safe_get_nested(config, "model_def", "name") + set_optional_attr_dict(attrs, gen_ai_attributes.GEN_AI_REQUEST_MODEL, model_name) + + agent_type = safe_get_nested(config, "agent_config", "type") + set_optional_attr_dict(attrs, "slop_code.agent.type", agent_type) + + pass_policy = safe_get_nested(config, "pass_policy", "value") + if pass_policy is None: + pass_policy_obj = safe_get(config, "pass_policy") + if pass_policy_obj is not None and hasattr(pass_policy_obj, "value"): + pass_policy = pass_policy_obj.value + set_optional_attr_dict(attrs, "slop_code.pass_policy", pass_policy) + + try: + with self._tracer.start_as_current_span( + name=span_name, + kind=SpanKind.INTERNAL, + attributes={k: v for k, v in attrs.items() if v is not None}, + ) as span: + try: + result = wrapped(*args, **kwargs) + + if isinstance(result, dict): + summary = result.get("summary") + if isinstance(summary, dict): + set_optional_attr( + span, "slop_code.state", summary.get("state") + ) + set_optional_attr( + span, + "slop_code.passed_policy", + summary.get("passed_policy"), + ) + + span.set_status(Status(StatusCode.OK)) + return result + except Exception as e: + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, str(e))) + raise + finally: + # Flush AFTER the `with` block so the workflow span itself + # is `on_end`-delivered to the SpanProcessor before we ask it + # to drain. run_agent_on_problem is the last meaningful work + # item inside the per-problem worker subprocess; once it + # returns, the process is reaped by ProcessPoolExecutor's + # shutdown which can short-circuit BatchSpanProcessor's + # atexit handler. Without this explicit flush the CHAIN span + # (and the tail batch of TASK/AGENT/STEP spans) gets dropped. + try: + provider = trace_api.get_tracer_provider() + flush = getattr(provider, "force_flush", None) + if callable(flush): + flush(timeout_millis=5000) + except Exception as flush_err: # noqa: BLE001 + logger.debug( + "force_flush after workflow span failed: %s", flush_err + ) + + +def set_optional_attr_dict(attrs: dict, key: str, value) -> None: + """Add to attrs dict only if value is not None.""" + if value is not None: + if isinstance(value, str) and len(value) > 1024: + value = value[:1024] + attrs[key] = value diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/test-requirements.txt b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/test-requirements.txt new file mode 100644 index 000000000..9facd6bc9 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/test-requirements.txt @@ -0,0 +1,8 @@ +pytest +pytest-asyncio +pytest-forked==1.6.0 +opentelemetry-api +opentelemetry-sdk +opentelemetry-instrumentation +opentelemetry-semantic-conventions +wrapt diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/conftest.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/conftest.py new file mode 100644 index 000000000..dcda695d0 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/conftest.py @@ -0,0 +1,209 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test configuration for slop-code instrumentation tests.""" + +import os +import sys +import types +from unittest.mock import MagicMock + +import pytest + +os.environ["OTEL_SEMCONV_STABILITY_OPT_IN"] = "gen_ai_latest_experimental" + + +def _make_module(name): + """Create a real module object.""" + mod = types.ModuleType(name) + mod.__package__ = name.rsplit(".", 1)[0] if "." in name else name + return mod + + +def _create_mock_slop_code_modules(): + """Create mock modules for slop_code so instrumentation can wrap them.""" + # Create all parent modules + mod_slop_code = _make_module("slop_code") + mod_entrypoints = _make_module("slop_code.entrypoints") + mod_commands = _make_module("slop_code.entrypoints.commands") + mod_run_agent = _make_module("slop_code.entrypoints.commands.run_agent") + mod_problem_runner = _make_module("slop_code.entrypoints.problem_runner") + mod_worker = _make_module("slop_code.entrypoints.problem_runner.worker") + mod_driver = _make_module("slop_code.entrypoints.problem_runner.driver") + mod_agent_runner = _make_module("slop_code.agent_runner") + mod_runner = _make_module("slop_code.agent_runner.runner") + mod_agent = _make_module("slop_code.agent_runner.agent") + mod_agents = _make_module("slop_code.agent_runner.agents") + mod_miniswe = _make_module("slop_code.agent_runner.agents.miniswe") + mod_metrics = _make_module("slop_code.metrics") + mod_rubric = _make_module("slop_code.metrics.rubric") + mod_router = _make_module("slop_code.metrics.rubric.router") + + # --- ENTRY: run_agent --- + def run_agent(*args, **kwargs): + return {"status": "completed"} + + mod_run_agent.run_agent = run_agent + + # --- WORKFLOW: run_agent_on_problem --- + def run_agent_on_problem(*args, **kwargs): + return {"summary": {"state": "completed", "passed_policy": True}} + + mod_worker.run_agent_on_problem = run_agent_on_problem + # driver re-imports the worker name at module load time. This mock mirrors + # the same pattern so the instrumentor's driver-side patch has a target. + mod_driver.run_agent_on_problem = run_agent_on_problem + + # --- TASK: AgentRunner._run_checkpoint --- + class AgentRunner: + def __init__(self): + self.agent = MagicMock() + self.agent.usage = MagicMock() + self.agent.usage.net_tokens = MagicMock() + self.agent.usage.net_tokens.input = 100 + self.agent.usage.net_tokens.output = 50 + + def _run_checkpoint(self, checkpoint, checkpoint_save_dir, is_first_checkpoint=False): + result = MagicMock() + result.had_error = False + result.passed_policy = True + return result + + mod_runner.AgentRunner = AgentRunner + + # --- AGENT: Agent.run_checkpoint --- + class Agent: + def __init__(self, problem_name="test_problem"): + self.problem_name = problem_name + self.usage = MagicMock() + self.usage.net_tokens = MagicMock() + self.usage.net_tokens.input = 100 + self.usage.net_tokens.output = 50 + self.usage.steps = 0 + self.usage.cost = 0.05 + + def run_checkpoint(self, task): + result = MagicMock() + result.usage = self.usage + result.elapsed = 10.5 + return result + + mod_agent.Agent = Agent + + # --- STEP: MiniSWEAgent.agent_step --- + class MiniSWEAgent(Agent): + def __init__(self, problem_name="test_problem"): + super().__init__(problem_name) + + def agent_step(self): + return { + "token_usage": MagicMock(input=200, output=80, cache_read=50, cache_write=10), + "step_cost": 0.01, + } + + mod_miniswe.MiniSWEAgent = MiniSWEAgent + + # --- LLM: grade_file_async --- + async def grade_file_async(*args, **kwargs): + grades = [{"score": 8, "reasoning": "Good code"}] + response_data = { + "id": "resp-123", + "usage": { + "prompt_tokens": 500, + "completion_tokens": 200, + "cache_read_input_tokens": 100, + "cache_creation_input_tokens": 50, + }, + } + return grades, response_data + + mod_router.grade_file_async = grade_file_async + + # Wire parent-child relationships + mod_slop_code.entrypoints = mod_entrypoints + mod_slop_code.agent_runner = mod_agent_runner + mod_slop_code.metrics = mod_metrics + mod_entrypoints.commands = mod_commands + mod_entrypoints.problem_runner = mod_problem_runner + mod_commands.run_agent = mod_run_agent + mod_problem_runner.worker = mod_worker + mod_problem_runner.driver = mod_driver + mod_agent_runner.runner = mod_runner + mod_agent_runner.agent = mod_agent + mod_agent_runner.agents = mod_agents + mod_agents.miniswe = mod_miniswe + mod_metrics.rubric = mod_rubric + mod_rubric.router = mod_router + + # Register all modules in sys.modules + modules = { + "slop_code": mod_slop_code, + "slop_code.entrypoints": mod_entrypoints, + "slop_code.entrypoints.commands": mod_commands, + "slop_code.entrypoints.commands.run_agent": mod_run_agent, + "slop_code.entrypoints.problem_runner": mod_problem_runner, + "slop_code.entrypoints.problem_runner.worker": mod_worker, + "slop_code.entrypoints.problem_runner.driver": mod_driver, + "slop_code.agent_runner": mod_agent_runner, + "slop_code.agent_runner.runner": mod_runner, + "slop_code.agent_runner.agent": mod_agent, + "slop_code.agent_runner.agents": mod_agents, + "slop_code.agent_runner.agents.miniswe": mod_miniswe, + "slop_code.metrics": mod_metrics, + "slop_code.metrics.rubric": mod_rubric, + "slop_code.metrics.rubric.router": mod_router, + } + + for name, mod in modules.items(): + sys.modules[name] = mod + + return modules + + +# Install mock modules before any instrumentation imports +_mock_modules = _create_mock_slop_code_modules() + + +@pytest.fixture(scope="function") +def span_exporter(): + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + + exporter = InMemorySpanExporter() + yield exporter + exporter.clear() + + +@pytest.fixture(scope="function") +def tracer_provider(span_exporter): + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + return provider + + +@pytest.fixture(scope="function") +def instrument(tracer_provider): + from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor + + instrumentor = SlopCodeInstrumentor() + instrumentor.instrument( + tracer_provider=tracer_provider, + skip_dep_check=True, + ) + yield instrumentor + instrumentor.uninstrument() diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_agent_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_agent_span.py new file mode 100644 index 000000000..d372ba220 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_agent_span.py @@ -0,0 +1,102 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for AGENT span (Agent.run_checkpoint).""" + +from unittest.mock import MagicMock + +import pytest + +from opentelemetry.trace import StatusCode + + +class TestAgentSpan: + """Verify that Agent.run_checkpoint produces an AGENT span.""" + + def test_agent_span_created(self, span_exporter, instrument): + """Agent.run_checkpoint should create an AGENT span.""" + import slop_code.agent_runner.agent as mod + + agent = mod.Agent(problem_name="file_backup") + result = agent.run_checkpoint("solve the bug") + + spans = span_exporter.get_finished_spans() + agent_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "invoke_agent" + ] + assert len(agent_spans) == 1 + + span = agent_spans[0] + assert span.name == "agent.Agent" + assert span.attributes["gen_ai.system"] == "slop-code" + assert span.attributes["gen_ai.span.kind"] == "AGENT" + assert span.attributes["gen_ai.agent.name"] == "Agent" + assert span.attributes["slop_code.problem.name"] == "file_backup" + assert span.status.status_code == StatusCode.OK + + def test_agent_span_captures_usage(self, span_exporter, instrument): + """AGENT span should capture token usage from result.""" + import slop_code.agent_runner.agent as mod + + agent = mod.Agent(problem_name="test_prob") + agent.run_checkpoint("task") + + spans = span_exporter.get_finished_spans() + agent_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "invoke_agent" + ] + assert len(agent_spans) == 1 + span = agent_spans[0] + + assert "gen_ai.usage.input_tokens" in span.attributes + assert "gen_ai.usage.output_tokens" in span.attributes + assert span.attributes["gen_ai.usage.input_tokens"] == 100 + assert span.attributes["gen_ai.usage.output_tokens"] == 50 + + def test_agent_span_error(self, span_exporter, tracer_provider): + """Exception in Agent.run_checkpoint should produce error span.""" + import slop_code.agent_runner.agent as mod + + from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor + + class FailingAgent(mod.Agent): + def run_checkpoint(self, task): + raise TimeoutError("Agent timeout") + + OriginalAgent = mod.Agent + mod.Agent = FailingAgent + + instrumentor = SlopCodeInstrumentor() + instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True) + + try: + agent = mod.Agent(problem_name="test_prob") + + with pytest.raises(TimeoutError, match="Agent timeout"): + agent.run_checkpoint("task") + + spans = span_exporter.get_finished_spans() + agent_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "invoke_agent" + ] + assert len(agent_spans) == 1 + span = agent_spans[0] + assert span.status.status_code == StatusCode.ERROR + assert span.attributes.get("error.type") == "TimeoutError" + finally: + instrumentor.uninstrument() + mod.Agent = OriginalAgent diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_entry_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_entry_span.py new file mode 100644 index 000000000..2f7c1751f --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_entry_span.py @@ -0,0 +1,74 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ENTRY span (run_agent).""" + +import pytest + +from opentelemetry.trace import StatusCode + + +class TestEntrySpan: + """Verify that run_agent produces an ENTRY span.""" + + def test_entry_span_created(self, span_exporter, instrument): + """run_agent should create an ENTRY span with correct attributes.""" + import slop_code.entrypoints.commands.run_agent as mod + + mod.run_agent() + + spans = span_exporter.get_finished_spans() + entry_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "ENTRY" + ] + assert len(entry_spans) == 1 + + span = entry_spans[0] + assert span.name == "slop-code.enter" + assert span.attributes["gen_ai.system"] == "slop-code" + assert span.attributes["gen_ai.operation.name"] == "enter" + assert span.status.status_code == StatusCode.OK + + def test_entry_span_error(self, span_exporter, tracer_provider): + """run_agent raising an exception should produce an error ENTRY span.""" + import slop_code.entrypoints.commands.run_agent as mod + + from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor + + # Store original and replace with failing function + original = mod.run_agent + + def failing_run_agent(*args, **kwargs): + raise RuntimeError("Config error") + + mod.run_agent = failing_run_agent + + instrumentor = SlopCodeInstrumentor() + instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True) + + try: + with pytest.raises(RuntimeError, match="Config error"): + mod.run_agent() + + spans = span_exporter.get_finished_spans() + entry_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "ENTRY" + ] + assert len(entry_spans) == 1 + assert entry_spans[0].status.status_code == StatusCode.ERROR + finally: + instrumentor.uninstrument() + mod.run_agent = original diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_hierarchy.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_hierarchy.py new file mode 100644 index 000000000..d33cc3568 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_hierarchy.py @@ -0,0 +1,118 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for span hierarchy and parent-child relationships.""" + +from unittest.mock import MagicMock + +import pytest + +from opentelemetry.trace import StatusCode + + +class TestSpanHierarchy: + """Verify parent-child relationships between spans.""" + + def test_entry_is_parent_of_workflow(self, span_exporter, instrument): + """ENTRY span should be parent of workflow span when called inline.""" + import slop_code.entrypoints.commands.run_agent as entry_mod + import slop_code.entrypoints.problem_runner.worker as worker_mod + + # Patch run_agent to call run_agent_on_problem internally + original = entry_mod.run_agent.__wrapped__ + + def run_with_workflow(*args, **kwargs): + config = MagicMock() + config.model_def = None + config.agent_config = None + config.pass_policy = None + return worker_mod.run_agent_on_problem( + MagicMock(), "test_problem", config, MagicMock(), "/tmp" + ) + + entry_mod.run_agent.__wrapped__ = run_with_workflow + + try: + entry_mod.run_agent() + + spans = span_exporter.get_finished_spans() + entry_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "ENTRY" + ] + workflow_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "workflow" + ] + + assert len(entry_spans) == 1 + assert len(workflow_spans) == 1 + + entry_span = entry_spans[0] + workflow_span = workflow_spans[0] + + # workflow should be child of entry + assert workflow_span.context.trace_id == entry_span.context.trace_id + assert workflow_span.parent is not None + assert workflow_span.parent.span_id == entry_span.context.span_id + finally: + entry_mod.run_agent.__wrapped__ = original + + def test_workflow_is_parent_of_task(self, span_exporter, instrument): + """Workflow span should be parent of task span when called inline.""" + import slop_code.agent_runner.runner as runner_mod + import slop_code.entrypoints.problem_runner.worker as worker_mod + + original = worker_mod.run_agent_on_problem.__wrapped__ + + def workflow_with_task(*args, **kwargs): + r = runner_mod.AgentRunner() + checkpoint = MagicMock() + checkpoint.name = "cp1" + checkpoint.order = 1 + r._run_checkpoint(checkpoint, "/tmp", True) + return {"summary": {"state": "completed", "passed_policy": True}} + + worker_mod.run_agent_on_problem.__wrapped__ = workflow_with_task + + try: + config = MagicMock() + config.model_def = None + config.agent_config = None + config.pass_policy = None + worker_mod.run_agent_on_problem( + MagicMock(), "prob1", config, MagicMock(), "/tmp" + ) + + spans = span_exporter.get_finished_spans() + workflow_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "workflow" + ] + task_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "run_task" + ] + + assert len(workflow_spans) == 1 + assert len(task_spans) == 1 + + workflow_span = workflow_spans[0] + task_span = task_spans[0] + + assert task_span.context.trace_id == workflow_span.context.trace_id + assert task_span.parent is not None + assert task_span.parent.span_id == workflow_span.context.span_id + finally: + worker_mod.run_agent_on_problem.__wrapped__ = original diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_llm_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_llm_span.py new file mode 100644 index 000000000..c88e46430 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_llm_span.py @@ -0,0 +1,142 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for LLM span (grade_file_async - Rubric Judge).""" + +from unittest.mock import MagicMock + +import pytest + +from opentelemetry.trace import SpanKind, StatusCode + + +@pytest.mark.asyncio +class TestLLMSpan: + """Verify that grade_file_async produces an LLM span.""" + + async def test_llm_span_created(self, span_exporter, instrument): + """grade_file_async should create an LLM span.""" + import slop_code.metrics.rubric.router as mod + + provider = MagicMock() + provider.value = "openrouter" + + grades, resp = await mod.grade_file_async( + "prompt_prefix", + "criteria_text", + "test.py", + "anthropic/claude-3.5-sonnet", + provider, + 0.7, + ) + + spans = span_exporter.get_finished_spans() + llm_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "LLM" + ] + assert len(llm_spans) == 1 + + span = llm_spans[0] + assert span.name == "chat anthropic/claude-3.5-sonnet" + assert span.attributes["gen_ai.system"] == "openrouter" + assert span.attributes["gen_ai.operation.name"] == "chat" + assert span.attributes["gen_ai.request.model"] == "anthropic/claude-3.5-sonnet" + assert span.attributes["gen_ai.request.temperature"] == 0.7 + assert span.kind == SpanKind.CLIENT + assert span.status.status_code == StatusCode.OK + + async def test_llm_span_captures_usage(self, span_exporter, instrument): + """LLM span should capture token usage from response.""" + import slop_code.metrics.rubric.router as mod + + provider = MagicMock() + provider.value = "openrouter" + + await mod.grade_file_async( + "prefix", "criteria", "file.py", + "anthropic/claude-3.5-sonnet", provider, 0.5, + ) + + spans = span_exporter.get_finished_spans() + llm_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "LLM" + ] + assert len(llm_spans) == 1 + span = llm_spans[0] + + assert span.attributes["gen_ai.usage.input_tokens"] == 500 + assert span.attributes["gen_ai.usage.output_tokens"] == 200 + assert span.attributes["gen_ai.usage.cache_read.input_tokens"] == 100 + assert span.attributes["gen_ai.usage.cache_creation.input_tokens"] == 50 + assert span.attributes["gen_ai.response.id"] == "resp-123" + + async def test_llm_span_error(self, span_exporter, tracer_provider): + """Exception in grade_file_async should produce an error LLM span.""" + import slop_code.metrics.rubric.router as mod + + from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor + + original = mod.grade_file_async + + async def failing_grade(*args, **kwargs): + raise ConnectionError("API unreachable") + + mod.grade_file_async = failing_grade + + instrumentor = SlopCodeInstrumentor() + instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True) + + provider = MagicMock() + provider.value = "bedrock" + + try: + with pytest.raises(ConnectionError, match="API unreachable"): + await mod.grade_file_async( + "prefix", "criteria", "file.py", + "us.anthropic.claude-3-5-sonnet-20241022-v2:0", provider, 0.3, + ) + + spans = span_exporter.get_finished_spans() + llm_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "LLM" + ] + assert len(llm_spans) == 1 + assert llm_spans[0].status.status_code == StatusCode.ERROR + assert llm_spans[0].attributes["gen_ai.system"] == "bedrock" + finally: + instrumentor.uninstrument() + mod.grade_file_async = original + + async def test_llm_span_bedrock_provider(self, span_exporter, instrument): + """LLM span with bedrock provider should use 'bedrock' as system.""" + import slop_code.metrics.rubric.router as mod + + provider = MagicMock() + provider.value = "bedrock" + + await mod.grade_file_async( + "prefix", "criteria", "file.py", + "us.anthropic.claude-3-5-sonnet-20241022-v2:0", provider, 0.5, + ) + + spans = span_exporter.get_finished_spans() + llm_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "LLM" + ] + assert len(llm_spans) == 1 + assert llm_spans[0].attributes["gen_ai.system"] == "bedrock" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_step_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_step_span.py new file mode 100644 index 000000000..70e221da2 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_step_span.py @@ -0,0 +1,133 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for STEP span (MiniSWEAgent.agent_step).""" + +from unittest.mock import MagicMock + +import pytest + +from opentelemetry.trace import StatusCode + + +class TestStepSpan: + """Verify that MiniSWEAgent.agent_step produces a STEP span.""" + + def test_step_span_created(self, span_exporter, instrument): + """agent_step should create a STEP span with token attributes.""" + import slop_code.agent_runner.agents.miniswe as mod + + agent = mod.MiniSWEAgent(problem_name="test_prob") + result = agent.agent_step() + + spans = span_exporter.get_finished_spans() + step_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "STEP" + ] + assert len(step_spans) == 1 + + span = step_spans[0] + assert span.name == "react.step.1" + assert span.attributes["gen_ai.system"] == "slop-code" + assert span.attributes["gen_ai.operation.name"] == "react" + assert span.attributes["gen_ai.react.round"] == 1 + assert span.status.status_code == StatusCode.OK + + def test_step_span_has_token_usage(self, span_exporter, instrument): + """STEP span should capture token usage from result.""" + import slop_code.agent_runner.agents.miniswe as mod + + agent = mod.MiniSWEAgent(problem_name="test_prob") + agent.agent_step() + + spans = span_exporter.get_finished_spans() + step_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "STEP" + ] + assert len(step_spans) == 1 + span = step_spans[0] + + assert span.attributes["gen_ai.usage.input_tokens"] == 200 + assert span.attributes["gen_ai.usage.output_tokens"] == 80 + assert span.attributes["gen_ai.usage.cache_read.input_tokens"] == 50 + assert span.attributes["gen_ai.usage.cache_creation.input_tokens"] == 10 + + def test_step_span_increments_round(self, span_exporter, instrument): + """Multiple agent_step calls should increment the round number.""" + import slop_code.agent_runner.agents.miniswe as mod + + agent = mod.MiniSWEAgent(problem_name="test_prob") + # Simulate steps=2 already completed + agent.usage.steps = 2 + agent.agent_step() + + spans = span_exporter.get_finished_spans() + step_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "STEP" + ] + assert len(step_spans) == 1 + assert step_spans[0].name == "react.step.3" + assert step_spans[0].attributes["gen_ai.react.round"] == 3 + + def test_step_span_error(self, span_exporter, tracer_provider): + """Exception in agent_step should produce an error STEP span.""" + import slop_code.agent_runner.agents.miniswe as mod + + from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor + + class FailingMiniSWE(mod.MiniSWEAgent): + def agent_step(self): + raise RuntimeError("LimitsExceeded") + + OriginalClass = mod.MiniSWEAgent + mod.MiniSWEAgent = FailingMiniSWE + + instrumentor = SlopCodeInstrumentor() + instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True) + + try: + agent = mod.MiniSWEAgent(problem_name="test_prob") + + with pytest.raises(RuntimeError, match="LimitsExceeded"): + agent.agent_step() + + spans = span_exporter.get_finished_spans() + step_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "STEP" + ] + assert len(step_spans) == 1 + span = step_spans[0] + assert span.status.status_code == StatusCode.ERROR + assert span.attributes["gen_ai.react.finish_reason"] == "error" + finally: + instrumentor.uninstrument() + mod.MiniSWEAgent = OriginalClass + + def test_step_span_finish_reason_stop(self, span_exporter, instrument): + """Successful step should have finish_reason='stop'.""" + import slop_code.agent_runner.agents.miniswe as mod + + agent = mod.MiniSWEAgent(problem_name="test_prob") + agent.agent_step() + + spans = span_exporter.get_finished_spans() + step_spans = [ + s for s in spans + if s.attributes.get("gen_ai.span.kind") == "STEP" + ] + assert step_spans[0].attributes["gen_ai.react.finish_reason"] == "stop" diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_task_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_task_span.py new file mode 100644 index 000000000..de3e16a95 --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_task_span.py @@ -0,0 +1,110 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for TASK span (AgentRunner._run_checkpoint).""" + +from unittest.mock import MagicMock + +import pytest + +from opentelemetry.trace import StatusCode + + +class TestTaskSpan: + """Verify that AgentRunner._run_checkpoint produces a TASK span.""" + + def test_task_span_created(self, span_exporter, instrument): + """_run_checkpoint should create a task span.""" + import slop_code.agent_runner.runner as mod + + runner = mod.AgentRunner() + + checkpoint = MagicMock() + checkpoint.name = "checkpoint_1" + checkpoint.order = 1 + + result = runner._run_checkpoint(checkpoint, "/tmp/save", True) + + spans = span_exporter.get_finished_spans() + task_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "run_task" + ] + assert len(task_spans) == 1 + + span = task_spans[0] + assert span.name == "task.checkpoint_1" + assert span.attributes["gen_ai.system"] == "slop-code" + assert span.attributes["gen_ai.span.kind"] == "TASK" + assert span.attributes["slop_code.checkpoint.name"] == "checkpoint_1" + assert span.attributes["slop_code.checkpoint.order"] == 1 + assert span.attributes["slop_code.is_first_checkpoint"] is True + assert span.status.status_code == StatusCode.OK + + def test_task_span_error(self, span_exporter, tracer_provider): + """Exception in _run_checkpoint should produce an error task span.""" + import slop_code.agent_runner.runner as mod + + from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor + + class FailingRunner(mod.AgentRunner): + def _run_checkpoint(self, checkpoint, checkpoint_save_dir, is_first_checkpoint=False): + raise RuntimeError("Checkpoint failed") + + # Replace class temporarily + OriginalRunner = mod.AgentRunner + mod.AgentRunner = FailingRunner + + instrumentor = SlopCodeInstrumentor() + instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True) + + try: + runner = mod.AgentRunner() + checkpoint = MagicMock() + checkpoint.name = "bad_checkpoint" + checkpoint.order = 2 + + with pytest.raises(RuntimeError, match="Checkpoint failed"): + runner._run_checkpoint(checkpoint, "/tmp/save", False) + + spans = span_exporter.get_finished_spans() + task_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "run_task" + ] + assert len(task_spans) == 1 + assert task_spans[0].status.status_code == StatusCode.ERROR + finally: + instrumentor.uninstrument() + mod.AgentRunner = OriginalRunner + + def test_task_span_not_first_checkpoint(self, span_exporter, instrument): + """Subsequent checkpoint should have is_first_checkpoint=False.""" + import slop_code.agent_runner.runner as mod + + runner = mod.AgentRunner() + + checkpoint = MagicMock() + checkpoint.name = "checkpoint_2" + checkpoint.order = 2 + + runner._run_checkpoint(checkpoint, "/tmp/save", False) + + spans = span_exporter.get_finished_spans() + task_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "run_task" + ] + assert len(task_spans) == 1 + assert task_spans[0].attributes["slop_code.is_first_checkpoint"] is False diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_workflow_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_workflow_span.py new file mode 100644 index 000000000..6d0a79ddc --- /dev/null +++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_workflow_span.py @@ -0,0 +1,117 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for CHAIN/workflow span (run_agent_on_problem).""" + +from unittest.mock import MagicMock + +import pytest + +from opentelemetry.trace import StatusCode + + +class TestWorkflowSpan: + """Verify that run_agent_on_problem produces a workflow span.""" + + def test_workflow_span_created(self, span_exporter, instrument): + """run_agent_on_problem should create a workflow span.""" + import slop_code.entrypoints.problem_runner.worker as mod + + config = MagicMock() + config.model_def = MagicMock() + config.model_def.name = "anthropic/claude-3.5-sonnet" + config.agent_config = MagicMock() + config.agent_config.type = "claude_code" + config.pass_policy = MagicMock() + config.pass_policy.value = "any" + + result = mod.run_agent_on_problem( + MagicMock(), # problem_config + "file_backup", # problem_name + config, # config + MagicMock(), # progress_queue + "/tmp/output", # output_path + ) + + spans = span_exporter.get_finished_spans() + workflow_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "workflow" + ] + assert len(workflow_spans) == 1 + + span = workflow_spans[0] + assert span.name == "workflow.file_backup" + assert span.attributes["gen_ai.system"] == "slop-code" + assert span.attributes["gen_ai.span.kind"] == "CHAIN" + assert span.attributes["slop_code.problem.name"] == "file_backup" + assert span.attributes["gen_ai.request.model"] == "anthropic/claude-3.5-sonnet" + assert span.attributes["slop_code.agent.type"] == "claude_code" + assert span.status.status_code == StatusCode.OK + + def test_workflow_span_error(self, span_exporter, tracer_provider): + """Exception in run_agent_on_problem should produce error workflow span.""" + import slop_code.entrypoints.problem_runner.worker as mod + + from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor + + original = mod.run_agent_on_problem + + def failing_worker(*args, **kwargs): + raise ValueError("Problem not found") + + mod.run_agent_on_problem = failing_worker + + instrumentor = SlopCodeInstrumentor() + instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True) + + try: + with pytest.raises(ValueError, match="Problem not found"): + mod.run_agent_on_problem( + MagicMock(), "missing_problem", MagicMock(), MagicMock(), "/tmp" + ) + + spans = span_exporter.get_finished_spans() + workflow_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "workflow" + ] + assert len(workflow_spans) == 1 + assert workflow_spans[0].status.status_code == StatusCode.ERROR + finally: + instrumentor.uninstrument() + mod.run_agent_on_problem = original + + def test_workflow_span_with_none_config_fields(self, span_exporter, instrument): + """Workflow span should handle None config fields gracefully.""" + import slop_code.entrypoints.problem_runner.worker as mod + + config = MagicMock() + config.model_def = None + config.agent_config = None + config.pass_policy = None + + mod.run_agent_on_problem( + MagicMock(), "test_problem", config, MagicMock(), "/tmp" + ) + + spans = span_exporter.get_finished_spans() + workflow_spans = [ + s for s in spans + if s.attributes.get("gen_ai.operation.name") == "workflow" + ] + assert len(workflow_spans) == 1 + span = workflow_spans[0] + assert span.attributes["slop_code.problem.name"] == "test_problem" + assert "gen_ai.request.model" not in span.attributes