alibaba · 123liuziming · May 7, 2026
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/README.md b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/README.md
@@ -0,0 +1,32 @@
+# LoongSuite slop-code-bench Instrumentation
+
+OpenTelemetry instrumentation for the [slop-code-bench](https://github.com/SprocketLab/slop-code-bench) benchmark orchestrator.
+
+## Span Tree
+
+```
+ENTRY  "slop-code.enter"
+└── CHAIN  "workflow.{problem_name}"
+    ├── TASK  "task.{checkpoint_name}"
+    │   └── AGENT  "agent.{agent_type}"
+    │       ├── STEP  "react.step.{N}"          [MiniSWE only]
+    │       └── ...
+    ├── TASK  "task.{checkpoint_name}"
+    │   └── AGENT  "agent.{agent_type}"
+    └── ...
+LLM  "chat {model_name}"                       [Rubric Judge]
+```
+
+## Installation
+
+```bash
+pip install loongsuite-instrumentation-slop-code
+```
+
+## Usage
+
+```python
+from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor
+
+SlopCodeInstrumentor().instrument()
+```
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/pyproject.toml
@@ -0,0 +1,61 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "loongsuite-instrumentation-slop-code"
+dynamic = ["version"]
+description = "LoongSuite slop-code-bench instrumentation"
+readme = "README.md"
+license = "Apache-2.0"
+requires-python = ">=3.10,<4"
+authors = [
+  { name = "Zhiyong Liu", email = "liuzhiyong.lzy@alibaba-inc.com" },
+  { name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+  "opentelemetry-api >= 1.37.0",
+  "opentelemetry-instrumentation >= 0.58b0",
+  "opentelemetry-semantic-conventions >= 0.58b0",
+  "wrapt >= 1.14.0, < 2.0.0",
+  "opentelemetry-util-genai >= 0.3b0.dev0",
+]
+
+[project.optional-dependencies]
+instruments = [
+  "slop-code-bench >= 0.1",
+]
+test = [
+  "pytest",
+  "pytest-asyncio",
+  "pytest-forked",
+  "opentelemetry-sdk",
+]
+
+[project.entry-points.opentelemetry_instrumentor]
+slop_code = "opentelemetry.instrumentation.slop_code:SlopCodeInstrumentor"
+
+[project.urls]
+Homepage = "https://github.com/alibaba/loongsuite-python-agent/tree/main/instrumentation-loongsuite/loongsuite-instrumentation-slop-code"
+Repository = "https://github.com/alibaba/loongsuite-python-agent"
+
+[tool.hatch.version]
+path = "src/opentelemetry/instrumentation/slop_code/version.py"
+
+[tool.hatch.build.targets.sdist]
+include = [
+  "/src",
+  "/tests",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/opentelemetry"]
diff --git a/...ngsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/__init__.py b/...ngsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/__init__.py
@@ -0,0 +1,211 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+OpenTelemetry slop-code-bench Instrumentation
+
+Instruments the slop-code benchmark orchestrator lifecycle:
+- ENTRY: run_agent (CLI entrypoint)
+- CHAIN/workflow: run_agent_on_problem (per-problem)
+- TASK: AgentRunner._run_checkpoint (per-checkpoint)
+- AGENT: Agent.run_checkpoint (concrete agent invocation)
+- STEP: MiniSWEAgent.agent_step (ReAct iteration)
+- LLM: grade_file_async (Rubric Judge)
+"""
+
+import logging
+from typing import Any, Collection
+
+from wrapt import wrap_function_wrapper
+
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+from opentelemetry.instrumentation.slop_code.package import _instruments
+from opentelemetry.instrumentation.slop_code.version import __version__
+from opentelemetry.instrumentation.slop_code.wrappers.agent import (
+    _AgentRunCheckpointWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.entry import (
+    _EntryWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.llm import (
+    _RubricGradeWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.step import (
+    _MiniSWEStepWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.task import (
+    _TaskRunCheckpointWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.workflow import (
+    _WorkflowWrapper,
+)
+from opentelemetry.instrumentation.utils import unwrap
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["SlopCodeInstrumentor", "__version__"]
+
+_MODULE_ENTRY = "slop_code.entrypoints.commands.run_agent"
+_MODULE_WORKER = "slop_code.entrypoints.problem_runner.worker"
+# slop_code.entrypoints.problem_runner.driver re-imports
+# `run_agent_on_problem` via `from .worker import run_agent_on_problem`
+# at package-load time, capturing the original function reference. Because
+# our wrap happens after that bind, we must additionally replace the local
+# binding inside `driver` itself, otherwise the worker subprocess still
+# calls the un-wrapped original and the CHAIN span never fires.
+_MODULE_DRIVER = "slop_code.entrypoints.problem_runner.driver"
+_MODULE_RUNNER = "slop_code.agent_runner.runner"
+_MODULE_AGENT = "slop_code.agent_runner.agent"
+_MODULE_MINISWE = "slop_code.agent_runner.agents.miniswe"
+_MODULE_RUBRIC = "slop_code.metrics.rubric.router"
+
+
+class SlopCodeInstrumentor(BaseInstrumentor):
+    """OpenTelemetry instrumentor for slop-code-bench framework."""
+
+    def instrumentation_dependencies(self) -> Collection[str]:
+        return _instruments
+
+    def _instrument(self, **kwargs: Any) -> None:
+        tracer_provider = kwargs.get("tracer_provider")
+        tracer = trace_api.get_tracer(
+            __name__,
+            __version__,
+            tracer_provider=tracer_provider,
+        )
+
+        # 3.1 ENTRY span: run_agent
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_ENTRY,
+                name="run_agent",
+                wrapper=_EntryWrapper(tracer),
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap run_agent: {e}")
+
+        # 3.2 CHAIN span: run_agent_on_problem
+        workflow_wrapper = _WorkflowWrapper(tracer)
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_WORKER,
+                name="run_agent_on_problem",
+                wrapper=workflow_wrapper,
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap run_agent_on_problem: {e}")
+        # Also wrap the re-bound name inside driver. driver.py imports
+        # run_agent_on_problem at module-load time via `from .worker import ...`,
+        # so the local name escapes our worker-module patch. The worker
+        # subprocess inherits this stale reference via fork(), and CHAIN
+        # spans never fire unless we patch the local re-bind too.
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_DRIVER,
+                name="run_agent_on_problem",
+                wrapper=workflow_wrapper,
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap driver.run_agent_on_problem: {e}")
+
+        # 3.3 TASK span: AgentRunner._run_checkpoint
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_RUNNER,
+                name="AgentRunner._run_checkpoint",
+                wrapper=_TaskRunCheckpointWrapper(tracer),
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap AgentRunner._run_checkpoint: {e}")
+
+        # 3.4 AGENT span: Agent.run_checkpoint
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_AGENT,
+                name="Agent.run_checkpoint",
+                wrapper=_AgentRunCheckpointWrapper(tracer),
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap Agent.run_checkpoint: {e}")
+
+        # 3.5 STEP span: MiniSWEAgent.agent_step
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_MINISWE,
+                name="MiniSWEAgent.agent_step",
+                wrapper=_MiniSWEStepWrapper(tracer),
+            )
+        except Exception as e:
+            logger.debug(f"Could not wrap MiniSWEAgent.agent_step: {e}")
+
+        # 3.6 LLM span: grade_file_async
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_RUBRIC,
+                name="grade_file_async",
+                wrapper=_RubricGradeWrapper(tracer),
+            )
+        except Exception as e:
+            logger.debug(f"Could not wrap grade_file_async: {e}")
+
+    def _uninstrument(self, **kwargs: Any) -> None:
+        try:
+            import slop_code.entrypoints.commands.run_agent as mod_entry
+
+            unwrap(mod_entry, "run_agent")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.entrypoints.problem_runner.worker as mod_worker
+
+            unwrap(mod_worker, "run_agent_on_problem")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.entrypoints.problem_runner.driver as mod_driver
+
+            unwrap(mod_driver, "run_agent_on_problem")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.agent_runner.runner as mod_runner
+
+            unwrap(mod_runner.AgentRunner, "_run_checkpoint")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.agent_runner.agent as mod_agent
+
+            unwrap(mod_agent.Agent, "run_checkpoint")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.agent_runner.agents.miniswe as mod_miniswe
+
+            unwrap(mod_miniswe.MiniSWEAgent, "agent_step")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.metrics.rubric.router as mod_rubric
+
+            unwrap(mod_rubric, "grade_file_async")
+        except Exception:
+            pass
diff --git a/...ongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/package.py b/...ongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/package.py
@@ -0,0 +1,17 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+_instruments = ("slop-code-bench >= 0.1",)
+
+_supports_metrics = True
diff --git a/...loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/utils.py b/...loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/utils.py
@@ -0,0 +1,51 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utility functions for slop-code instrumentation."""
+
+from typing import Any, Optional
+
+from opentelemetry.trace import Span
+
+SYSTEM_NAME = "slop-code"
+MAX_ATTR_LEN = 1024
+
+
+def safe_get(obj: Any, attr: str, default: Any = None) -> Any:
+    """Safely get an attribute from an object, returning default on failure."""
+    try:
+        return getattr(obj, attr, default)
+    except Exception:
+        return default
+
+
+def safe_get_nested(obj: Any, *attrs: str, default: Any = None) -> Any:
+    """Safely traverse nested attributes."""
+    current = obj
+    for attr in attrs:
+        try:
+            current = getattr(current, attr)
+            if current is None:
+                return default
+        except (AttributeError, TypeError):
+            return default
+    return current
+
+
+def set_optional_attr(span: Span, key: str, value: Optional[Any]) -> None:
+    """Set a span attribute only if value is not None."""
+    if value is not None:
+        if isinstance(value, str) and len(value) > MAX_ATTR_LEN:
+            value = value[:MAX_ATTR_LEN]
+        span.set_attribute(key, value)
diff --git a/...ongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/version.py b/...ongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/version.py
@@ -0,0 +1,15 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = "0.5.0.dev"