Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# LoongSuite slop-code-bench Instrumentation

OpenTelemetry instrumentation for the [slop-code-bench](https://github.com/SprocketLab/slop-code-bench) benchmark orchestrator.

## Span Tree

```
ENTRY "slop-code.enter"
└── CHAIN "workflow.{problem_name}"
├── TASK "task.{checkpoint_name}"
│ └── AGENT "agent.{agent_type}"
│ ├── STEP "react.step.{N}" [MiniSWE only]
│ └── ...
├── TASK "task.{checkpoint_name}"
│ └── AGENT "agent.{agent_type}"
└── ...
LLM "chat {model_name}" [Rubric Judge]
```

## Installation

```bash
pip install loongsuite-instrumentation-slop-code
```

## Usage

```python
from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor

SlopCodeInstrumentor().instrument()
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "loongsuite-instrumentation-slop-code"
dynamic = ["version"]
description = "LoongSuite slop-code-bench instrumentation"
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.10,<4"
authors = [
{ name = "Zhiyong Liu", email = "liuzhiyong.lzy@alibaba-inc.com" },
{ name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" },
]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
dependencies = [
"opentelemetry-api >= 1.37.0",
"opentelemetry-instrumentation >= 0.58b0",
"opentelemetry-semantic-conventions >= 0.58b0",
"wrapt >= 1.14.0, < 2.0.0",
"opentelemetry-util-genai >= 0.3b0.dev0",
]

[project.optional-dependencies]
instruments = [
"slop-code-bench >= 0.1",
]
test = [
"pytest",
"pytest-asyncio",
"pytest-forked",
"opentelemetry-sdk",
]

[project.entry-points.opentelemetry_instrumentor]
slop_code = "opentelemetry.instrumentation.slop_code:SlopCodeInstrumentor"

[project.urls]
Homepage = "https://github.com/alibaba/loongsuite-python-agent/tree/main/instrumentation-loongsuite/loongsuite-instrumentation-slop-code"
Repository = "https://github.com/alibaba/loongsuite-python-agent"

[tool.hatch.version]
path = "src/opentelemetry/instrumentation/slop_code/version.py"

[tool.hatch.build.targets.sdist]
include = [
"/src",
"/tests",
]

[tool.hatch.build.targets.wheel]
packages = ["src/opentelemetry"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
OpenTelemetry slop-code-bench Instrumentation

Instruments the slop-code benchmark orchestrator lifecycle:
- ENTRY: run_agent (CLI entrypoint)
- CHAIN/workflow: run_agent_on_problem (per-problem)
- TASK: AgentRunner._run_checkpoint (per-checkpoint)
- AGENT: Agent.run_checkpoint (concrete agent invocation)
- STEP: MiniSWEAgent.agent_step (ReAct iteration)
- LLM: grade_file_async (Rubric Judge)
"""

import logging
from typing import Any, Collection

from wrapt import wrap_function_wrapper

from opentelemetry import trace as trace_api
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
from opentelemetry.instrumentation.slop_code.package import _instruments
from opentelemetry.instrumentation.slop_code.version import __version__
from opentelemetry.instrumentation.slop_code.wrappers.agent import (
_AgentRunCheckpointWrapper,
)
from opentelemetry.instrumentation.slop_code.wrappers.entry import (
_EntryWrapper,
)
from opentelemetry.instrumentation.slop_code.wrappers.llm import (
_RubricGradeWrapper,
)
from opentelemetry.instrumentation.slop_code.wrappers.step import (
_MiniSWEStepWrapper,
)
from opentelemetry.instrumentation.slop_code.wrappers.task import (
_TaskRunCheckpointWrapper,
)
from opentelemetry.instrumentation.slop_code.wrappers.workflow import (
_WorkflowWrapper,
)
from opentelemetry.instrumentation.utils import unwrap

logger = logging.getLogger(__name__)

__all__ = ["SlopCodeInstrumentor", "__version__"]

_MODULE_ENTRY = "slop_code.entrypoints.commands.run_agent"
_MODULE_WORKER = "slop_code.entrypoints.problem_runner.worker"
# slop_code.entrypoints.problem_runner.driver re-imports
# `run_agent_on_problem` via `from .worker import run_agent_on_problem`
# at package-load time, capturing the original function reference. Because
# our wrap happens after that bind, we must additionally replace the local
# binding inside `driver` itself, otherwise the worker subprocess still
# calls the un-wrapped original and the CHAIN span never fires.
_MODULE_DRIVER = "slop_code.entrypoints.problem_runner.driver"
_MODULE_RUNNER = "slop_code.agent_runner.runner"
_MODULE_AGENT = "slop_code.agent_runner.agent"
_MODULE_MINISWE = "slop_code.agent_runner.agents.miniswe"
_MODULE_RUBRIC = "slop_code.metrics.rubric.router"


class SlopCodeInstrumentor(BaseInstrumentor):
"""OpenTelemetry instrumentor for slop-code-bench framework."""

def instrumentation_dependencies(self) -> Collection[str]:
return _instruments

def _instrument(self, **kwargs: Any) -> None:
tracer_provider = kwargs.get("tracer_provider")
tracer = trace_api.get_tracer(
__name__,
__version__,
tracer_provider=tracer_provider,
)

# 3.1 ENTRY span: run_agent
try:
wrap_function_wrapper(
module=_MODULE_ENTRY,
name="run_agent",
wrapper=_EntryWrapper(tracer),
)
except Exception as e:
logger.warning(f"Could not wrap run_agent: {e}")

# 3.2 CHAIN span: run_agent_on_problem
workflow_wrapper = _WorkflowWrapper(tracer)
try:
wrap_function_wrapper(
module=_MODULE_WORKER,
name="run_agent_on_problem",
wrapper=workflow_wrapper,
)
except Exception as e:
logger.warning(f"Could not wrap run_agent_on_problem: {e}")
# Also wrap the re-bound name inside driver. driver.py imports
# run_agent_on_problem at module-load time via `from .worker import ...`,
# so the local name escapes our worker-module patch. The worker
# subprocess inherits this stale reference via fork(), and CHAIN
# spans never fire unless we patch the local re-bind too.
try:
wrap_function_wrapper(
module=_MODULE_DRIVER,
name="run_agent_on_problem",
wrapper=workflow_wrapper,
)
except Exception as e:
logger.warning(f"Could not wrap driver.run_agent_on_problem: {e}")

# 3.3 TASK span: AgentRunner._run_checkpoint
try:
wrap_function_wrapper(
module=_MODULE_RUNNER,
name="AgentRunner._run_checkpoint",
wrapper=_TaskRunCheckpointWrapper(tracer),
)
except Exception as e:
logger.warning(f"Could not wrap AgentRunner._run_checkpoint: {e}")

# 3.4 AGENT span: Agent.run_checkpoint
try:
wrap_function_wrapper(
module=_MODULE_AGENT,
name="Agent.run_checkpoint",
wrapper=_AgentRunCheckpointWrapper(tracer),
)
except Exception as e:
logger.warning(f"Could not wrap Agent.run_checkpoint: {e}")

# 3.5 STEP span: MiniSWEAgent.agent_step
try:
wrap_function_wrapper(
module=_MODULE_MINISWE,
name="MiniSWEAgent.agent_step",
wrapper=_MiniSWEStepWrapper(tracer),
)
except Exception as e:
logger.debug(f"Could not wrap MiniSWEAgent.agent_step: {e}")

# 3.6 LLM span: grade_file_async
try:
wrap_function_wrapper(
module=_MODULE_RUBRIC,
name="grade_file_async",
wrapper=_RubricGradeWrapper(tracer),
)
except Exception as e:
logger.debug(f"Could not wrap grade_file_async: {e}")

def _uninstrument(self, **kwargs: Any) -> None:
try:
import slop_code.entrypoints.commands.run_agent as mod_entry

unwrap(mod_entry, "run_agent")
except Exception:
pass

try:
import slop_code.entrypoints.problem_runner.worker as mod_worker

unwrap(mod_worker, "run_agent_on_problem")
except Exception:
pass

try:
import slop_code.entrypoints.problem_runner.driver as mod_driver

unwrap(mod_driver, "run_agent_on_problem")
except Exception:
pass

try:
import slop_code.agent_runner.runner as mod_runner

unwrap(mod_runner.AgentRunner, "_run_checkpoint")
except Exception:
pass

try:
import slop_code.agent_runner.agent as mod_agent

unwrap(mod_agent.Agent, "run_checkpoint")
except Exception:
pass

try:
import slop_code.agent_runner.agents.miniswe as mod_miniswe

unwrap(mod_miniswe.MiniSWEAgent, "agent_step")
except Exception:
pass

try:
import slop_code.metrics.rubric.router as mod_rubric

unwrap(mod_rubric, "grade_file_async")
except Exception:
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

_instruments = ("slop-code-bench >= 0.1",)

_supports_metrics = True
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Utility functions for slop-code instrumentation."""

from typing import Any, Optional

from opentelemetry.trace import Span

SYSTEM_NAME = "slop-code"
MAX_ATTR_LEN = 1024


def safe_get(obj: Any, attr: str, default: Any = None) -> Any:
"""Safely get an attribute from an object, returning default on failure."""
try:
return getattr(obj, attr, default)
except Exception:
return default


def safe_get_nested(obj: Any, *attrs: str, default: Any = None) -> Any:
"""Safely traverse nested attributes."""
current = obj
for attr in attrs:
try:
current = getattr(current, attr)
if current is None:
return default
except (AttributeError, TypeError):
return default
return current


def set_optional_attr(span: Span, key: str, value: Optional[Any]) -> None:
"""Set a span attribute only if value is not None."""
if value is not None:
if isinstance(value, str) and len(value) > MAX_ATTR_LEN:
value = value[:MAX_ATTR_LEN]
span.set_attribute(key, value)
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.5.0.dev"
Loading
Loading