diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/README.md b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/README.md
new file mode 100644
index 000000000..4d4f4d7b1
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/README.md
@@ -0,0 +1,32 @@
+# LoongSuite slop-code-bench Instrumentation
+
+OpenTelemetry instrumentation for the [slop-code-bench](https://github.com/SprocketLab/slop-code-bench) benchmark orchestrator.
+
+## Span Tree
+
+```
+ENTRY  "slop-code.enter"
+└── CHAIN  "workflow.{problem_name}"
+    ├── TASK  "task.{checkpoint_name}"
+    │   └── AGENT  "agent.{agent_type}"
+    │       ├── STEP  "react.step.{N}"          [MiniSWE only]
+    │       └── ...
+    ├── TASK  "task.{checkpoint_name}"
+    │   └── AGENT  "agent.{agent_type}"
+    └── ...
+LLM  "chat {model_name}"                       [Rubric Judge]
+```
+
+## Installation
+
+```bash
+pip install loongsuite-instrumentation-slop-code
+```
+
+## Usage
+
+```python
+from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor
+
+SlopCodeInstrumentor().instrument()
+```
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/pyproject.toml
new file mode 100644
index 000000000..b443381c2
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/pyproject.toml
@@ -0,0 +1,61 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "loongsuite-instrumentation-slop-code"
+dynamic = ["version"]
+description = "LoongSuite slop-code-bench instrumentation"
+readme = "README.md"
+license = "Apache-2.0"
+requires-python = ">=3.10,<4"
+authors = [
+  { name = "Zhiyong Liu", email = "liuzhiyong.lzy@alibaba-inc.com" },
+  { name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+  "opentelemetry-api >= 1.37.0",
+  "opentelemetry-instrumentation >= 0.58b0",
+  "opentelemetry-semantic-conventions >= 0.58b0",
+  "wrapt >= 1.14.0, < 2.0.0",
+  "opentelemetry-util-genai >= 0.3b0.dev0",
+]
+
+[project.optional-dependencies]
+instruments = [
+  "slop-code-bench >= 0.1",
+]
+test = [
+  "pytest",
+  "pytest-asyncio",
+  "pytest-forked",
+  "opentelemetry-sdk",
+]
+
+[project.entry-points.opentelemetry_instrumentor]
+slop_code = "opentelemetry.instrumentation.slop_code:SlopCodeInstrumentor"
+
+[project.urls]
+Homepage = "https://github.com/alibaba/loongsuite-python-agent/tree/main/instrumentation-loongsuite/loongsuite-instrumentation-slop-code"
+Repository = "https://github.com/alibaba/loongsuite-python-agent"
+
+[tool.hatch.version]
+path = "src/opentelemetry/instrumentation/slop_code/version.py"
+
+[tool.hatch.build.targets.sdist]
+include = [
+  "/src",
+  "/tests",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/opentelemetry"]
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/__init__.py
new file mode 100644
index 000000000..973cd969e
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/__init__.py
@@ -0,0 +1,211 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+OpenTelemetry slop-code-bench Instrumentation
+
+Instruments the slop-code benchmark orchestrator lifecycle:
+- ENTRY: run_agent (CLI entrypoint)
+- CHAIN/workflow: run_agent_on_problem (per-problem)
+- TASK: AgentRunner._run_checkpoint (per-checkpoint)
+- AGENT: Agent.run_checkpoint (concrete agent invocation)
+- STEP: MiniSWEAgent.agent_step (ReAct iteration)
+- LLM: grade_file_async (Rubric Judge)
+"""
+
+import logging
+from typing import Any, Collection
+
+from wrapt import wrap_function_wrapper
+
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+from opentelemetry.instrumentation.slop_code.package import _instruments
+from opentelemetry.instrumentation.slop_code.version import __version__
+from opentelemetry.instrumentation.slop_code.wrappers.agent import (
+    _AgentRunCheckpointWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.entry import (
+    _EntryWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.llm import (
+    _RubricGradeWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.step import (
+    _MiniSWEStepWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.task import (
+    _TaskRunCheckpointWrapper,
+)
+from opentelemetry.instrumentation.slop_code.wrappers.workflow import (
+    _WorkflowWrapper,
+)
+from opentelemetry.instrumentation.utils import unwrap
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["SlopCodeInstrumentor", "__version__"]
+
+_MODULE_ENTRY = "slop_code.entrypoints.commands.run_agent"
+_MODULE_WORKER = "slop_code.entrypoints.problem_runner.worker"
+# slop_code.entrypoints.problem_runner.driver re-imports
+# `run_agent_on_problem` via `from .worker import run_agent_on_problem`
+# at package-load time, capturing the original function reference. Because
+# our wrap happens after that bind, we must additionally replace the local
+# binding inside `driver` itself, otherwise the worker subprocess still
+# calls the un-wrapped original and the CHAIN span never fires.
+_MODULE_DRIVER = "slop_code.entrypoints.problem_runner.driver"
+_MODULE_RUNNER = "slop_code.agent_runner.runner"
+_MODULE_AGENT = "slop_code.agent_runner.agent"
+_MODULE_MINISWE = "slop_code.agent_runner.agents.miniswe"
+_MODULE_RUBRIC = "slop_code.metrics.rubric.router"
+
+
+class SlopCodeInstrumentor(BaseInstrumentor):
+    """OpenTelemetry instrumentor for slop-code-bench framework."""
+
+    def instrumentation_dependencies(self) -> Collection[str]:
+        return _instruments
+
+    def _instrument(self, **kwargs: Any) -> None:
+        tracer_provider = kwargs.get("tracer_provider")
+        tracer = trace_api.get_tracer(
+            __name__,
+            __version__,
+            tracer_provider=tracer_provider,
+        )
+
+        # 3.1 ENTRY span: run_agent
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_ENTRY,
+                name="run_agent",
+                wrapper=_EntryWrapper(tracer),
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap run_agent: {e}")
+
+        # 3.2 CHAIN span: run_agent_on_problem
+        workflow_wrapper = _WorkflowWrapper(tracer)
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_WORKER,
+                name="run_agent_on_problem",
+                wrapper=workflow_wrapper,
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap run_agent_on_problem: {e}")
+        # Also wrap the re-bound name inside driver. driver.py imports
+        # run_agent_on_problem at module-load time via `from .worker import ...`,
+        # so the local name escapes our worker-module patch. The worker
+        # subprocess inherits this stale reference via fork(), and CHAIN
+        # spans never fire unless we patch the local re-bind too.
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_DRIVER,
+                name="run_agent_on_problem",
+                wrapper=workflow_wrapper,
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap driver.run_agent_on_problem: {e}")
+
+        # 3.3 TASK span: AgentRunner._run_checkpoint
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_RUNNER,
+                name="AgentRunner._run_checkpoint",
+                wrapper=_TaskRunCheckpointWrapper(tracer),
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap AgentRunner._run_checkpoint: {e}")
+
+        # 3.4 AGENT span: Agent.run_checkpoint
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_AGENT,
+                name="Agent.run_checkpoint",
+                wrapper=_AgentRunCheckpointWrapper(tracer),
+            )
+        except Exception as e:
+            logger.warning(f"Could not wrap Agent.run_checkpoint: {e}")
+
+        # 3.5 STEP span: MiniSWEAgent.agent_step
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_MINISWE,
+                name="MiniSWEAgent.agent_step",
+                wrapper=_MiniSWEStepWrapper(tracer),
+            )
+        except Exception as e:
+            logger.debug(f"Could not wrap MiniSWEAgent.agent_step: {e}")
+
+        # 3.6 LLM span: grade_file_async
+        try:
+            wrap_function_wrapper(
+                module=_MODULE_RUBRIC,
+                name="grade_file_async",
+                wrapper=_RubricGradeWrapper(tracer),
+            )
+        except Exception as e:
+            logger.debug(f"Could not wrap grade_file_async: {e}")
+
+    def _uninstrument(self, **kwargs: Any) -> None:
+        try:
+            import slop_code.entrypoints.commands.run_agent as mod_entry
+
+            unwrap(mod_entry, "run_agent")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.entrypoints.problem_runner.worker as mod_worker
+
+            unwrap(mod_worker, "run_agent_on_problem")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.entrypoints.problem_runner.driver as mod_driver
+
+            unwrap(mod_driver, "run_agent_on_problem")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.agent_runner.runner as mod_runner
+
+            unwrap(mod_runner.AgentRunner, "_run_checkpoint")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.agent_runner.agent as mod_agent
+
+            unwrap(mod_agent.Agent, "run_checkpoint")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.agent_runner.agents.miniswe as mod_miniswe
+
+            unwrap(mod_miniswe.MiniSWEAgent, "agent_step")
+        except Exception:
+            pass
+
+        try:
+            import slop_code.metrics.rubric.router as mod_rubric
+
+            unwrap(mod_rubric, "grade_file_async")
+        except Exception:
+            pass
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/package.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/package.py
new file mode 100644
index 000000000..13b6fe785
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/package.py
@@ -0,0 +1,17 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+_instruments = ("slop-code-bench >= 0.1",)
+
+_supports_metrics = True
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/utils.py
new file mode 100644
index 000000000..ee7fce73f
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/utils.py
@@ -0,0 +1,51 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utility functions for slop-code instrumentation."""
+
+from typing import Any, Optional
+
+from opentelemetry.trace import Span
+
+SYSTEM_NAME = "slop-code"
+MAX_ATTR_LEN = 1024
+
+
+def safe_get(obj: Any, attr: str, default: Any = None) -> Any:
+    """Safely get an attribute from an object, returning default on failure."""
+    try:
+        return getattr(obj, attr, default)
+    except Exception:
+        return default
+
+
+def safe_get_nested(obj: Any, *attrs: str, default: Any = None) -> Any:
+    """Safely traverse nested attributes."""
+    current = obj
+    for attr in attrs:
+        try:
+            current = getattr(current, attr)
+            if current is None:
+                return default
+        except (AttributeError, TypeError):
+            return default
+    return current
+
+
+def set_optional_attr(span: Span, key: str, value: Optional[Any]) -> None:
+    """Set a span attribute only if value is not None."""
+    if value is not None:
+        if isinstance(value, str) and len(value) > MAX_ATTR_LEN:
+            value = value[:MAX_ATTR_LEN]
+        span.set_attribute(key, value)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/version.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/version.py
new file mode 100644
index 000000000..7bee975f0
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/version.py
@@ -0,0 +1,15 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = "0.5.0.dev"
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/__init__.py
new file mode 100644
index 000000000..b0a6f4284
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/__init__.py
@@ -0,0 +1,13 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/agent.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/agent.py
new file mode 100644
index 000000000..94cb2b88a
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/agent.py
@@ -0,0 +1,91 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""AGENT span wrapper for Agent.run_checkpoint."""
+
+import logging
+
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.slop_code.utils import (
+    SYSTEM_NAME,
+    safe_get,
+    safe_get_nested,
+    set_optional_attr,
+)
+from opentelemetry.semconv._incubating.attributes import gen_ai_attributes
+from opentelemetry.trace import SpanKind, Status, StatusCode
+from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes
+
+logger = logging.getLogger(__name__)
+
+
+class _AgentRunCheckpointWrapper:
+    """Wrapper for Agent.run_checkpoint to create AGENT span."""
+
+    def __init__(self, tracer: trace_api.Tracer):
+        self._tracer = tracer
+
+    def __call__(self, wrapped, instance, args, kwargs):
+        agent_name = type(instance).__name__
+        problem_name = safe_get(instance, "problem_name", "unknown")
+
+        span_name = f"agent.{agent_name}"
+
+        attrs = {
+            gen_ai_attributes.GEN_AI_OPERATION_NAME: "invoke_agent",
+            gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME,
+            gen_ai_extended_attributes.GEN_AI_SPAN_KIND: gen_ai_extended_attributes.GenAiSpanKindValues.AGENT.value,
+            "gen_ai.agent.name": agent_name,
+            "slop_code.problem.name": str(problem_name),
+        }
+
+        with self._tracer.start_as_current_span(
+            name=span_name,
+            kind=SpanKind.INTERNAL,
+            attributes=attrs,
+        ) as span:
+            try:
+                result = wrapped(*args, **kwargs)
+
+                # Extract after-call attributes from result
+                if result is not None:
+                    usage = safe_get(result, "usage")
+                    if usage is not None:
+                        net_tokens = safe_get(usage, "net_tokens")
+                        if net_tokens is not None:
+                            set_optional_attr(
+                                span,
+                                gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS,
+                                safe_get(net_tokens, "input"),
+                            )
+                            set_optional_attr(
+                                span,
+                                gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                safe_get(net_tokens, "output"),
+                            )
+                        cost = safe_get(usage, "cost")
+                        set_optional_attr(span, "slop_code.usage.cost", cost)
+                        steps = safe_get(usage, "steps")
+                        set_optional_attr(span, "slop_code.usage.steps", steps)
+
+                    elapsed = safe_get(result, "elapsed")
+                    set_optional_attr(span, "slop_code.elapsed_seconds", elapsed)
+
+                span.set_status(Status(StatusCode.OK))
+                return result
+            except Exception as e:
+                span.record_exception(e)
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                span.set_attribute("error.type", type(e).__name__)
+                raise
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/entry.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/entry.py
new file mode 100644
index 000000000..d31e666f1
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/entry.py
@@ -0,0 +1,58 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ENTRY span wrapper for slop_code.entrypoints.commands.run_agent.run_agent."""
+
+import logging
+
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.slop_code.utils import (
+    SYSTEM_NAME,
+    safe_get,
+    safe_get_nested,
+    set_optional_attr,
+)
+from opentelemetry.semconv._incubating.attributes import gen_ai_attributes
+from opentelemetry.trace import SpanKind, Status, StatusCode
+from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes
+
+logger = logging.getLogger(__name__)
+
+
+class _EntryWrapper:
+    """Wrapper for run_agent to create ENTRY span."""
+
+    def __init__(self, tracer: trace_api.Tracer):
+        self._tracer = tracer
+
+    def __call__(self, wrapped, instance, args, kwargs):
+        span_name = "slop-code.enter"
+
+        with self._tracer.start_as_current_span(
+            name=span_name,
+            kind=SpanKind.INTERNAL,
+            attributes={
+                gen_ai_attributes.GEN_AI_OPERATION_NAME: "enter",
+                gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME,
+                gen_ai_extended_attributes.GEN_AI_SPAN_KIND: gen_ai_extended_attributes.GenAiSpanKindValues.ENTRY.value,
+            },
+        ) as span:
+            try:
+                result = wrapped(*args, **kwargs)
+                span.set_status(Status(StatusCode.OK))
+                return result
+            except Exception as e:
+                span.record_exception(e)
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                raise
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/llm.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/llm.py
new file mode 100644
index 000000000..0aaba20b8
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/llm.py
@@ -0,0 +1,104 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""LLM span wrapper for grade_file_async (Rubric Judge)."""
+
+import logging
+
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.slop_code.utils import (
+    SYSTEM_NAME,
+    set_optional_attr,
+)
+from opentelemetry.semconv._incubating.attributes import gen_ai_attributes
+from opentelemetry.trace import SpanKind, Status, StatusCode
+from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes
+
+logger = logging.getLogger(__name__)
+
+
+class _RubricGradeWrapper:
+    """Wrapper for grade_file_async to create LLM span."""
+
+    def __init__(self, tracer: trace_api.Tracer):
+        self._tracer = tracer
+
+    async def __call__(self, wrapped, instance, args, kwargs):
+        # grade_file_async(prompt_prefix, criteria_text, file_name, model, provider, temperature, ...)
+        model = kwargs.get("model") or (args[3] if len(args) > 3 else "unknown")
+        provider = kwargs.get("provider") or (args[4] if len(args) > 4 else None)
+        temperature = kwargs.get("temperature") or (args[5] if len(args) > 5 else None)
+
+        # Determine system name from provider
+        system_name = SYSTEM_NAME
+        if provider is not None:
+            provider_val = provider.value if hasattr(provider, "value") else str(provider)
+            system_name = provider_val.lower()
+
+        span_name = f"chat {model}"
+
+        attrs = {
+            gen_ai_attributes.GEN_AI_OPERATION_NAME: "chat",
+            gen_ai_attributes.GEN_AI_SYSTEM: system_name,
+            gen_ai_extended_attributes.GEN_AI_SPAN_KIND: gen_ai_extended_attributes.GenAiSpanKindValues.LLM.value,
+            gen_ai_attributes.GEN_AI_REQUEST_MODEL: str(model),
+        }
+
+        if temperature is not None:
+            attrs[gen_ai_attributes.GEN_AI_REQUEST_TEMPERATURE] = float(temperature)
+
+        with self._tracer.start_as_current_span(
+            name=span_name,
+            kind=SpanKind.CLIENT,
+            attributes=attrs,
+        ) as span:
+            try:
+                result = await wrapped(*args, **kwargs)
+
+                # result is tuple[list[dict], dict[str, Any]]
+                if isinstance(result, tuple) and len(result) >= 2:
+                    response_data = result[1]
+                    if isinstance(response_data, dict):
+                        _set_usage_from_response(span, response_data)
+                        response_id = response_data.get("id")
+                        set_optional_attr(span, "gen_ai.response.id", response_id)
+
+                span.set_status(Status(StatusCode.OK))
+                return result
+            except Exception as e:
+                span.record_exception(e)
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                raise
+
+
+def _set_usage_from_response(span, response_data: dict) -> None:
+    """Extract and set token usage attributes from response_data."""
+    usage = response_data.get("usage")
+    if not isinstance(usage, dict):
+        return
+
+    # OpenRouter format: prompt_tokens / completion_tokens
+    # Bedrock format (normalized): input_tokens / output_tokens
+    input_tokens = usage.get("prompt_tokens") or usage.get("input_tokens")
+    output_tokens = usage.get("completion_tokens") or usage.get("output_tokens")
+
+    set_optional_attr(span, gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
+    set_optional_attr(span, gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
+
+    # Cache tokens (OpenRouter specific)
+    cache_read = usage.get("cache_read_input_tokens")
+    set_optional_attr(span, gen_ai_extended_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read)
+
+    cache_creation = usage.get("cache_creation_input_tokens")
+    set_optional_attr(span, gen_ai_extended_attributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, cache_creation)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/step.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/step.py
new file mode 100644
index 000000000..93219fe89
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/step.py
@@ -0,0 +1,110 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""STEP span wrapper for MiniSWEAgent.agent_step."""
+
+import logging
+
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.slop_code.utils import (
+    SYSTEM_NAME,
+    safe_get,
+    safe_get_nested,
+    set_optional_attr,
+)
+from opentelemetry.semconv._incubating.attributes import gen_ai_attributes
+from opentelemetry.trace import SpanKind, Status, StatusCode
+from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes
+
+logger = logging.getLogger(__name__)
+
+
+class _MiniSWEStepWrapper:
+    """Wrapper for MiniSWEAgent.agent_step to create STEP span."""
+
+    def __init__(self, tracer: trace_api.Tracer):
+        self._tracer = tracer
+
+    def __call__(self, wrapped, instance, args, kwargs):
+        # Determine current step number (1-based)
+        usage = safe_get(instance, "usage")
+        current_steps = safe_get(usage, "steps", 0) if usage else 0
+        step_num = current_steps + 1
+
+        span_name = f"react.step.{step_num}"
+
+        attrs = {
+            gen_ai_attributes.GEN_AI_OPERATION_NAME: "react",
+            gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME,
+            gen_ai_extended_attributes.GEN_AI_SPAN_KIND: gen_ai_extended_attributes.GenAiSpanKindValues.STEP.value,
+            gen_ai_extended_attributes.GEN_AI_REACT_ROUND: step_num,
+        }
+
+        with self._tracer.start_as_current_span(
+            name=span_name,
+            kind=SpanKind.INTERNAL,
+            attributes=attrs,
+        ) as span:
+            try:
+                result = wrapped(*args, **kwargs)
+
+                # Extract token usage from result if available
+                if isinstance(result, dict):
+                    token_usage = result.get("token_usage")
+                    if token_usage is not None:
+                        set_optional_attr(
+                            span,
+                            gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS,
+                            safe_get(token_usage, "input"),
+                        )
+                        set_optional_attr(
+                            span,
+                            gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS,
+                            safe_get(token_usage, "output"),
+                        )
+                        set_optional_attr(
+                            span,
+                            gen_ai_extended_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                            safe_get(token_usage, "cache_read"),
+                        )
+                        set_optional_attr(
+                            span,
+                            gen_ai_extended_attributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                            safe_get(token_usage, "cache_write"),
+                        )
+                    step_cost = result.get("step_cost")
+                    set_optional_attr(span, "slop_code.step.cost", step_cost)
+                elif result is not None:
+                    # Result might be a tuple or object; try attribute access
+                    token_usage = safe_get(result, "token_usage")
+                    if token_usage is not None:
+                        set_optional_attr(
+                            span,
+                            gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS,
+                            safe_get(token_usage, "input"),
+                        )
+                        set_optional_attr(
+                            span,
+                            gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS,
+                            safe_get(token_usage, "output"),
+                        )
+
+                span.set_status(Status(StatusCode.OK))
+                span.set_attribute(gen_ai_extended_attributes.GEN_AI_REACT_FINISH_REASON, "stop")
+                return result
+            except Exception as e:
+                span.record_exception(e)
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                span.set_attribute(gen_ai_extended_attributes.GEN_AI_REACT_FINISH_REASON, "error")
+                raise
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/task.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/task.py
new file mode 100644
index 000000000..b0f60a4fc
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/task.py
@@ -0,0 +1,91 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""TASK span wrapper for AgentRunner._run_checkpoint."""
+
+import logging
+
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.slop_code.utils import (
+    SYSTEM_NAME,
+    safe_get,
+    safe_get_nested,
+    set_optional_attr,
+)
+from opentelemetry.semconv._incubating.attributes import gen_ai_attributes
+from opentelemetry.trace import SpanKind, Status, StatusCode
+from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes
+
+logger = logging.getLogger(__name__)
+
+
+class _TaskRunCheckpointWrapper:
+    """Wrapper for AgentRunner._run_checkpoint to create TASK span."""
+
+    def __init__(self, tracer: trace_api.Tracer):
+        self._tracer = tracer
+
+    def __call__(self, wrapped, instance, args, kwargs):
+        # _run_checkpoint(self, checkpoint, checkpoint_save_dir, is_first_checkpoint)
+        checkpoint = args[0] if args else kwargs.get("checkpoint")
+        is_first_checkpoint = args[2] if len(args) > 2 else kwargs.get("is_first_checkpoint", False)
+
+        checkpoint_name = safe_get(checkpoint, "name", "unknown")
+        checkpoint_order = safe_get(checkpoint, "order")
+
+        span_name = f"task.{checkpoint_name}"
+
+        attrs = {
+            gen_ai_attributes.GEN_AI_OPERATION_NAME: "run_task",
+            gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME,
+            gen_ai_extended_attributes.GEN_AI_SPAN_KIND: "TASK",
+            "slop_code.checkpoint.name": str(checkpoint_name),
+        }
+
+        if checkpoint_order is not None:
+            attrs["slop_code.checkpoint.order"] = checkpoint_order
+        attrs["slop_code.is_first_checkpoint"] = bool(is_first_checkpoint)
+
+        with self._tracer.start_as_current_span(
+            name=span_name,
+            kind=SpanKind.INTERNAL,
+            attributes=attrs,
+        ) as span:
+            try:
+                result = wrapped(*args, **kwargs)
+
+                # Extract after-call attributes from summary
+                if result is not None:
+                    had_error = safe_get(result, "had_error")
+                    set_optional_attr(span, "slop_code.had_error", had_error)
+
+                    passed_policy = safe_get(result, "passed_policy")
+                    set_optional_attr(span, "slop_code.passed_policy", passed_policy)
+
+                # Token usage from agent
+                agent = safe_get(instance, "agent")
+                if agent is not None:
+                    net_tokens = safe_get_nested(agent, "usage", "net_tokens")
+                    if net_tokens is not None:
+                        input_tokens = safe_get(net_tokens, "input")
+                        output_tokens = safe_get(net_tokens, "output")
+                        set_optional_attr(span, gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
+                        set_optional_attr(span, gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
+
+                span.set_status(Status(StatusCode.OK))
+                return result
+            except Exception as e:
+                span.record_exception(e)
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                raise
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/workflow.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/workflow.py
new file mode 100644
index 000000000..4793d4286
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/src/opentelemetry/instrumentation/slop_code/wrappers/workflow.py
@@ -0,0 +1,120 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""CHAIN/workflow span wrapper for run_agent_on_problem."""
+
+import logging
+
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.slop_code.utils import (
+    SYSTEM_NAME,
+    safe_get,
+    safe_get_nested,
+    set_optional_attr,
+)
+from opentelemetry.semconv._incubating.attributes import gen_ai_attributes
+from opentelemetry.trace import SpanKind, Status, StatusCode
+from opentelemetry.util.genai.extended_semconv import gen_ai_extended_attributes
+
+logger = logging.getLogger(__name__)
+
+
+class _WorkflowWrapper:
+    """Wrapper for run_agent_on_problem to create workflow (CHAIN) span."""
+
+    def __init__(self, tracer: trace_api.Tracer):
+        self._tracer = tracer
+
+    def __call__(self, wrapped, instance, args, kwargs):
+        # run_agent_on_problem(problem_config, problem_name, config, progress_queue, output_path)
+        problem_name = args[1] if len(args) > 1 else kwargs.get("problem_name", "unknown")
+        config = args[2] if len(args) > 2 else kwargs.get("config")
+
+        span_name = f"workflow.{problem_name}"
+
+        attrs = {
+            gen_ai_attributes.GEN_AI_OPERATION_NAME: "workflow",
+            gen_ai_attributes.GEN_AI_SYSTEM: SYSTEM_NAME,
+            gen_ai_extended_attributes.GEN_AI_SPAN_KIND: "CHAIN",
+            "slop_code.problem.name": str(problem_name),
+        }
+
+        # Extract optional attributes from config
+        if config is not None:
+            model_name = safe_get_nested(config, "model_def", "name")
+            set_optional_attr_dict(attrs, gen_ai_attributes.GEN_AI_REQUEST_MODEL, model_name)
+
+            agent_type = safe_get_nested(config, "agent_config", "type")
+            set_optional_attr_dict(attrs, "slop_code.agent.type", agent_type)
+
+            pass_policy = safe_get_nested(config, "pass_policy", "value")
+            if pass_policy is None:
+                pass_policy_obj = safe_get(config, "pass_policy")
+                if pass_policy_obj is not None and hasattr(pass_policy_obj, "value"):
+                    pass_policy = pass_policy_obj.value
+            set_optional_attr_dict(attrs, "slop_code.pass_policy", pass_policy)
+
+        try:
+            with self._tracer.start_as_current_span(
+                name=span_name,
+                kind=SpanKind.INTERNAL,
+                attributes={k: v for k, v in attrs.items() if v is not None},
+            ) as span:
+                try:
+                    result = wrapped(*args, **kwargs)
+
+                    if isinstance(result, dict):
+                        summary = result.get("summary")
+                        if isinstance(summary, dict):
+                            set_optional_attr(
+                                span, "slop_code.state", summary.get("state")
+                            )
+                            set_optional_attr(
+                                span,
+                                "slop_code.passed_policy",
+                                summary.get("passed_policy"),
+                            )
+
+                    span.set_status(Status(StatusCode.OK))
+                    return result
+                except Exception as e:
+                    span.record_exception(e)
+                    span.set_status(Status(StatusCode.ERROR, str(e)))
+                    raise
+        finally:
+            # Flush AFTER the `with` block so the workflow span itself
+            # is `on_end`-delivered to the SpanProcessor before we ask it
+            # to drain. run_agent_on_problem is the last meaningful work
+            # item inside the per-problem worker subprocess; once it
+            # returns, the process is reaped by ProcessPoolExecutor's
+            # shutdown which can short-circuit BatchSpanProcessor's
+            # atexit handler. Without this explicit flush the CHAIN span
+            # (and the tail batch of TASK/AGENT/STEP spans) gets dropped.
+            try:
+                provider = trace_api.get_tracer_provider()
+                flush = getattr(provider, "force_flush", None)
+                if callable(flush):
+                    flush(timeout_millis=5000)
+            except Exception as flush_err:  # noqa: BLE001
+                logger.debug(
+                    "force_flush after workflow span failed: %s", flush_err
+                )
+
+
+def set_optional_attr_dict(attrs: dict, key: str, value) -> None:
+    """Add to attrs dict only if value is not None."""
+    if value is not None:
+        if isinstance(value, str) and len(value) > 1024:
+            value = value[:1024]
+        attrs[key] = value
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/test-requirements.txt b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/test-requirements.txt
new file mode 100644
index 000000000..9facd6bc9
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/test-requirements.txt
@@ -0,0 +1,8 @@
+pytest
+pytest-asyncio
+pytest-forked==1.6.0
+opentelemetry-api
+opentelemetry-sdk
+opentelemetry-instrumentation
+opentelemetry-semantic-conventions
+wrapt
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/conftest.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/conftest.py
new file mode 100644
index 000000000..dcda695d0
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/conftest.py
@@ -0,0 +1,209 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test configuration for slop-code instrumentation tests."""
+
+import os
+import sys
+import types
+from unittest.mock import MagicMock
+
+import pytest
+
+os.environ["OTEL_SEMCONV_STABILITY_OPT_IN"] = "gen_ai_latest_experimental"
+
+
+def _make_module(name):
+    """Create a real module object."""
+    mod = types.ModuleType(name)
+    mod.__package__ = name.rsplit(".", 1)[0] if "." in name else name
+    return mod
+
+
+def _create_mock_slop_code_modules():
+    """Create mock modules for slop_code so instrumentation can wrap them."""
+    # Create all parent modules
+    mod_slop_code = _make_module("slop_code")
+    mod_entrypoints = _make_module("slop_code.entrypoints")
+    mod_commands = _make_module("slop_code.entrypoints.commands")
+    mod_run_agent = _make_module("slop_code.entrypoints.commands.run_agent")
+    mod_problem_runner = _make_module("slop_code.entrypoints.problem_runner")
+    mod_worker = _make_module("slop_code.entrypoints.problem_runner.worker")
+    mod_driver = _make_module("slop_code.entrypoints.problem_runner.driver")
+    mod_agent_runner = _make_module("slop_code.agent_runner")
+    mod_runner = _make_module("slop_code.agent_runner.runner")
+    mod_agent = _make_module("slop_code.agent_runner.agent")
+    mod_agents = _make_module("slop_code.agent_runner.agents")
+    mod_miniswe = _make_module("slop_code.agent_runner.agents.miniswe")
+    mod_metrics = _make_module("slop_code.metrics")
+    mod_rubric = _make_module("slop_code.metrics.rubric")
+    mod_router = _make_module("slop_code.metrics.rubric.router")
+
+    # --- ENTRY: run_agent ---
+    def run_agent(*args, **kwargs):
+        return {"status": "completed"}
+
+    mod_run_agent.run_agent = run_agent
+
+    # --- WORKFLOW: run_agent_on_problem ---
+    def run_agent_on_problem(*args, **kwargs):
+        return {"summary": {"state": "completed", "passed_policy": True}}
+
+    mod_worker.run_agent_on_problem = run_agent_on_problem
+    # driver re-imports the worker name at module load time. This mock mirrors
+    # the same pattern so the instrumentor's driver-side patch has a target.
+    mod_driver.run_agent_on_problem = run_agent_on_problem
+
+    # --- TASK: AgentRunner._run_checkpoint ---
+    class AgentRunner:
+        def __init__(self):
+            self.agent = MagicMock()
+            self.agent.usage = MagicMock()
+            self.agent.usage.net_tokens = MagicMock()
+            self.agent.usage.net_tokens.input = 100
+            self.agent.usage.net_tokens.output = 50
+
+        def _run_checkpoint(self, checkpoint, checkpoint_save_dir, is_first_checkpoint=False):
+            result = MagicMock()
+            result.had_error = False
+            result.passed_policy = True
+            return result
+
+    mod_runner.AgentRunner = AgentRunner
+
+    # --- AGENT: Agent.run_checkpoint ---
+    class Agent:
+        def __init__(self, problem_name="test_problem"):
+            self.problem_name = problem_name
+            self.usage = MagicMock()
+            self.usage.net_tokens = MagicMock()
+            self.usage.net_tokens.input = 100
+            self.usage.net_tokens.output = 50
+            self.usage.steps = 0
+            self.usage.cost = 0.05
+
+        def run_checkpoint(self, task):
+            result = MagicMock()
+            result.usage = self.usage
+            result.elapsed = 10.5
+            return result
+
+    mod_agent.Agent = Agent
+
+    # --- STEP: MiniSWEAgent.agent_step ---
+    class MiniSWEAgent(Agent):
+        def __init__(self, problem_name="test_problem"):
+            super().__init__(problem_name)
+
+        def agent_step(self):
+            return {
+                "token_usage": MagicMock(input=200, output=80, cache_read=50, cache_write=10),
+                "step_cost": 0.01,
+            }
+
+    mod_miniswe.MiniSWEAgent = MiniSWEAgent
+
+    # --- LLM: grade_file_async ---
+    async def grade_file_async(*args, **kwargs):
+        grades = [{"score": 8, "reasoning": "Good code"}]
+        response_data = {
+            "id": "resp-123",
+            "usage": {
+                "prompt_tokens": 500,
+                "completion_tokens": 200,
+                "cache_read_input_tokens": 100,
+                "cache_creation_input_tokens": 50,
+            },
+        }
+        return grades, response_data
+
+    mod_router.grade_file_async = grade_file_async
+
+    # Wire parent-child relationships
+    mod_slop_code.entrypoints = mod_entrypoints
+    mod_slop_code.agent_runner = mod_agent_runner
+    mod_slop_code.metrics = mod_metrics
+    mod_entrypoints.commands = mod_commands
+    mod_entrypoints.problem_runner = mod_problem_runner
+    mod_commands.run_agent = mod_run_agent
+    mod_problem_runner.worker = mod_worker
+    mod_problem_runner.driver = mod_driver
+    mod_agent_runner.runner = mod_runner
+    mod_agent_runner.agent = mod_agent
+    mod_agent_runner.agents = mod_agents
+    mod_agents.miniswe = mod_miniswe
+    mod_metrics.rubric = mod_rubric
+    mod_rubric.router = mod_router
+
+    # Register all modules in sys.modules
+    modules = {
+        "slop_code": mod_slop_code,
+        "slop_code.entrypoints": mod_entrypoints,
+        "slop_code.entrypoints.commands": mod_commands,
+        "slop_code.entrypoints.commands.run_agent": mod_run_agent,
+        "slop_code.entrypoints.problem_runner": mod_problem_runner,
+        "slop_code.entrypoints.problem_runner.worker": mod_worker,
+        "slop_code.entrypoints.problem_runner.driver": mod_driver,
+        "slop_code.agent_runner": mod_agent_runner,
+        "slop_code.agent_runner.runner": mod_runner,
+        "slop_code.agent_runner.agent": mod_agent,
+        "slop_code.agent_runner.agents": mod_agents,
+        "slop_code.agent_runner.agents.miniswe": mod_miniswe,
+        "slop_code.metrics": mod_metrics,
+        "slop_code.metrics.rubric": mod_rubric,
+        "slop_code.metrics.rubric.router": mod_router,
+    }
+
+    for name, mod in modules.items():
+        sys.modules[name] = mod
+
+    return modules
+
+
+# Install mock modules before any instrumentation imports
+_mock_modules = _create_mock_slop_code_modules()
+
+
+@pytest.fixture(scope="function")
+def span_exporter():
+    from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+        InMemorySpanExporter,
+    )
+
+    exporter = InMemorySpanExporter()
+    yield exporter
+    exporter.clear()
+
+
+@pytest.fixture(scope="function")
+def tracer_provider(span_exporter):
+    from opentelemetry.sdk.trace import TracerProvider
+    from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+
+    provider = TracerProvider()
+    provider.add_span_processor(SimpleSpanProcessor(span_exporter))
+    return provider
+
+
+@pytest.fixture(scope="function")
+def instrument(tracer_provider):
+    from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor
+
+    instrumentor = SlopCodeInstrumentor()
+    instrumentor.instrument(
+        tracer_provider=tracer_provider,
+        skip_dep_check=True,
+    )
+    yield instrumentor
+    instrumentor.uninstrument()
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_agent_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_agent_span.py
new file mode 100644
index 000000000..d372ba220
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_agent_span.py
@@ -0,0 +1,102 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for AGENT span (Agent.run_checkpoint)."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from opentelemetry.trace import StatusCode
+
+
+class TestAgentSpan:
+    """Verify that Agent.run_checkpoint produces an AGENT span."""
+
+    def test_agent_span_created(self, span_exporter, instrument):
+        """Agent.run_checkpoint should create an AGENT span."""
+        import slop_code.agent_runner.agent as mod
+
+        agent = mod.Agent(problem_name="file_backup")
+        result = agent.run_checkpoint("solve the bug")
+
+        spans = span_exporter.get_finished_spans()
+        agent_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.operation.name") == "invoke_agent"
+        ]
+        assert len(agent_spans) == 1
+
+        span = agent_spans[0]
+        assert span.name == "agent.Agent"
+        assert span.attributes["gen_ai.system"] == "slop-code"
+        assert span.attributes["gen_ai.span.kind"] == "AGENT"
+        assert span.attributes["gen_ai.agent.name"] == "Agent"
+        assert span.attributes["slop_code.problem.name"] == "file_backup"
+        assert span.status.status_code == StatusCode.OK
+
+    def test_agent_span_captures_usage(self, span_exporter, instrument):
+        """AGENT span should capture token usage from result."""
+        import slop_code.agent_runner.agent as mod
+
+        agent = mod.Agent(problem_name="test_prob")
+        agent.run_checkpoint("task")
+
+        spans = span_exporter.get_finished_spans()
+        agent_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.operation.name") == "invoke_agent"
+        ]
+        assert len(agent_spans) == 1
+        span = agent_spans[0]
+
+        assert "gen_ai.usage.input_tokens" in span.attributes
+        assert "gen_ai.usage.output_tokens" in span.attributes
+        assert span.attributes["gen_ai.usage.input_tokens"] == 100
+        assert span.attributes["gen_ai.usage.output_tokens"] == 50
+
+    def test_agent_span_error(self, span_exporter, tracer_provider):
+        """Exception in Agent.run_checkpoint should produce error span."""
+        import slop_code.agent_runner.agent as mod
+
+        from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor
+
+        class FailingAgent(mod.Agent):
+            def run_checkpoint(self, task):
+                raise TimeoutError("Agent timeout")
+
+        OriginalAgent = mod.Agent
+        mod.Agent = FailingAgent
+
+        instrumentor = SlopCodeInstrumentor()
+        instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True)
+
+        try:
+            agent = mod.Agent(problem_name="test_prob")
+
+            with pytest.raises(TimeoutError, match="Agent timeout"):
+                agent.run_checkpoint("task")
+
+            spans = span_exporter.get_finished_spans()
+            agent_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.operation.name") == "invoke_agent"
+            ]
+            assert len(agent_spans) == 1
+            span = agent_spans[0]
+            assert span.status.status_code == StatusCode.ERROR
+            assert span.attributes.get("error.type") == "TimeoutError"
+        finally:
+            instrumentor.uninstrument()
+            mod.Agent = OriginalAgent
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_entry_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_entry_span.py
new file mode 100644
index 000000000..2f7c1751f
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_entry_span.py
@@ -0,0 +1,74 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for ENTRY span (run_agent)."""
+
+import pytest
+
+from opentelemetry.trace import StatusCode
+
+
+class TestEntrySpan:
+    """Verify that run_agent produces an ENTRY span."""
+
+    def test_entry_span_created(self, span_exporter, instrument):
+        """run_agent should create an ENTRY span with correct attributes."""
+        import slop_code.entrypoints.commands.run_agent as mod
+
+        mod.run_agent()
+
+        spans = span_exporter.get_finished_spans()
+        entry_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.span.kind") == "ENTRY"
+        ]
+        assert len(entry_spans) == 1
+
+        span = entry_spans[0]
+        assert span.name == "slop-code.enter"
+        assert span.attributes["gen_ai.system"] == "slop-code"
+        assert span.attributes["gen_ai.operation.name"] == "enter"
+        assert span.status.status_code == StatusCode.OK
+
+    def test_entry_span_error(self, span_exporter, tracer_provider):
+        """run_agent raising an exception should produce an error ENTRY span."""
+        import slop_code.entrypoints.commands.run_agent as mod
+
+        from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor
+
+        # Store original and replace with failing function
+        original = mod.run_agent
+
+        def failing_run_agent(*args, **kwargs):
+            raise RuntimeError("Config error")
+
+        mod.run_agent = failing_run_agent
+
+        instrumentor = SlopCodeInstrumentor()
+        instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True)
+
+        try:
+            with pytest.raises(RuntimeError, match="Config error"):
+                mod.run_agent()
+
+            spans = span_exporter.get_finished_spans()
+            entry_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.span.kind") == "ENTRY"
+            ]
+            assert len(entry_spans) == 1
+            assert entry_spans[0].status.status_code == StatusCode.ERROR
+        finally:
+            instrumentor.uninstrument()
+            mod.run_agent = original
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_hierarchy.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_hierarchy.py
new file mode 100644
index 000000000..d33cc3568
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_hierarchy.py
@@ -0,0 +1,118 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for span hierarchy and parent-child relationships."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from opentelemetry.trace import StatusCode
+
+
+class TestSpanHierarchy:
+    """Verify parent-child relationships between spans."""
+
+    def test_entry_is_parent_of_workflow(self, span_exporter, instrument):
+        """ENTRY span should be parent of workflow span when called inline."""
+        import slop_code.entrypoints.commands.run_agent as entry_mod
+        import slop_code.entrypoints.problem_runner.worker as worker_mod
+
+        # Patch run_agent to call run_agent_on_problem internally
+        original = entry_mod.run_agent.__wrapped__
+
+        def run_with_workflow(*args, **kwargs):
+            config = MagicMock()
+            config.model_def = None
+            config.agent_config = None
+            config.pass_policy = None
+            return worker_mod.run_agent_on_problem(
+                MagicMock(), "test_problem", config, MagicMock(), "/tmp"
+            )
+
+        entry_mod.run_agent.__wrapped__ = run_with_workflow
+
+        try:
+            entry_mod.run_agent()
+
+            spans = span_exporter.get_finished_spans()
+            entry_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.span.kind") == "ENTRY"
+            ]
+            workflow_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.operation.name") == "workflow"
+            ]
+
+            assert len(entry_spans) == 1
+            assert len(workflow_spans) == 1
+
+            entry_span = entry_spans[0]
+            workflow_span = workflow_spans[0]
+
+            # workflow should be child of entry
+            assert workflow_span.context.trace_id == entry_span.context.trace_id
+            assert workflow_span.parent is not None
+            assert workflow_span.parent.span_id == entry_span.context.span_id
+        finally:
+            entry_mod.run_agent.__wrapped__ = original
+
+    def test_workflow_is_parent_of_task(self, span_exporter, instrument):
+        """Workflow span should be parent of task span when called inline."""
+        import slop_code.agent_runner.runner as runner_mod
+        import slop_code.entrypoints.problem_runner.worker as worker_mod
+
+        original = worker_mod.run_agent_on_problem.__wrapped__
+
+        def workflow_with_task(*args, **kwargs):
+            r = runner_mod.AgentRunner()
+            checkpoint = MagicMock()
+            checkpoint.name = "cp1"
+            checkpoint.order = 1
+            r._run_checkpoint(checkpoint, "/tmp", True)
+            return {"summary": {"state": "completed", "passed_policy": True}}
+
+        worker_mod.run_agent_on_problem.__wrapped__ = workflow_with_task
+
+        try:
+            config = MagicMock()
+            config.model_def = None
+            config.agent_config = None
+            config.pass_policy = None
+            worker_mod.run_agent_on_problem(
+                MagicMock(), "prob1", config, MagicMock(), "/tmp"
+            )
+
+            spans = span_exporter.get_finished_spans()
+            workflow_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.operation.name") == "workflow"
+            ]
+            task_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.operation.name") == "run_task"
+            ]
+
+            assert len(workflow_spans) == 1
+            assert len(task_spans) == 1
+
+            workflow_span = workflow_spans[0]
+            task_span = task_spans[0]
+
+            assert task_span.context.trace_id == workflow_span.context.trace_id
+            assert task_span.parent is not None
+            assert task_span.parent.span_id == workflow_span.context.span_id
+        finally:
+            worker_mod.run_agent_on_problem.__wrapped__ = original
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_llm_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_llm_span.py
new file mode 100644
index 000000000..c88e46430
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_llm_span.py
@@ -0,0 +1,142 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for LLM span (grade_file_async - Rubric Judge)."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from opentelemetry.trace import SpanKind, StatusCode
+
+
+@pytest.mark.asyncio
+class TestLLMSpan:
+    """Verify that grade_file_async produces an LLM span."""
+
+    async def test_llm_span_created(self, span_exporter, instrument):
+        """grade_file_async should create an LLM span."""
+        import slop_code.metrics.rubric.router as mod
+
+        provider = MagicMock()
+        provider.value = "openrouter"
+
+        grades, resp = await mod.grade_file_async(
+            "prompt_prefix",
+            "criteria_text",
+            "test.py",
+            "anthropic/claude-3.5-sonnet",
+            provider,
+            0.7,
+        )
+
+        spans = span_exporter.get_finished_spans()
+        llm_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.span.kind") == "LLM"
+        ]
+        assert len(llm_spans) == 1
+
+        span = llm_spans[0]
+        assert span.name == "chat anthropic/claude-3.5-sonnet"
+        assert span.attributes["gen_ai.system"] == "openrouter"
+        assert span.attributes["gen_ai.operation.name"] == "chat"
+        assert span.attributes["gen_ai.request.model"] == "anthropic/claude-3.5-sonnet"
+        assert span.attributes["gen_ai.request.temperature"] == 0.7
+        assert span.kind == SpanKind.CLIENT
+        assert span.status.status_code == StatusCode.OK
+
+    async def test_llm_span_captures_usage(self, span_exporter, instrument):
+        """LLM span should capture token usage from response."""
+        import slop_code.metrics.rubric.router as mod
+
+        provider = MagicMock()
+        provider.value = "openrouter"
+
+        await mod.grade_file_async(
+            "prefix", "criteria", "file.py",
+            "anthropic/claude-3.5-sonnet", provider, 0.5,
+        )
+
+        spans = span_exporter.get_finished_spans()
+        llm_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.span.kind") == "LLM"
+        ]
+        assert len(llm_spans) == 1
+        span = llm_spans[0]
+
+        assert span.attributes["gen_ai.usage.input_tokens"] == 500
+        assert span.attributes["gen_ai.usage.output_tokens"] == 200
+        assert span.attributes["gen_ai.usage.cache_read.input_tokens"] == 100
+        assert span.attributes["gen_ai.usage.cache_creation.input_tokens"] == 50
+        assert span.attributes["gen_ai.response.id"] == "resp-123"
+
+    async def test_llm_span_error(self, span_exporter, tracer_provider):
+        """Exception in grade_file_async should produce an error LLM span."""
+        import slop_code.metrics.rubric.router as mod
+
+        from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor
+
+        original = mod.grade_file_async
+
+        async def failing_grade(*args, **kwargs):
+            raise ConnectionError("API unreachable")
+
+        mod.grade_file_async = failing_grade
+
+        instrumentor = SlopCodeInstrumentor()
+        instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True)
+
+        provider = MagicMock()
+        provider.value = "bedrock"
+
+        try:
+            with pytest.raises(ConnectionError, match="API unreachable"):
+                await mod.grade_file_async(
+                    "prefix", "criteria", "file.py",
+                    "us.anthropic.claude-3-5-sonnet-20241022-v2:0", provider, 0.3,
+                )
+
+            spans = span_exporter.get_finished_spans()
+            llm_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.span.kind") == "LLM"
+            ]
+            assert len(llm_spans) == 1
+            assert llm_spans[0].status.status_code == StatusCode.ERROR
+            assert llm_spans[0].attributes["gen_ai.system"] == "bedrock"
+        finally:
+            instrumentor.uninstrument()
+            mod.grade_file_async = original
+
+    async def test_llm_span_bedrock_provider(self, span_exporter, instrument):
+        """LLM span with bedrock provider should use 'bedrock' as system."""
+        import slop_code.metrics.rubric.router as mod
+
+        provider = MagicMock()
+        provider.value = "bedrock"
+
+        await mod.grade_file_async(
+            "prefix", "criteria", "file.py",
+            "us.anthropic.claude-3-5-sonnet-20241022-v2:0", provider, 0.5,
+        )
+
+        spans = span_exporter.get_finished_spans()
+        llm_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.span.kind") == "LLM"
+        ]
+        assert len(llm_spans) == 1
+        assert llm_spans[0].attributes["gen_ai.system"] == "bedrock"
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_step_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_step_span.py
new file mode 100644
index 000000000..70e221da2
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_step_span.py
@@ -0,0 +1,133 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for STEP span (MiniSWEAgent.agent_step)."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from opentelemetry.trace import StatusCode
+
+
+class TestStepSpan:
+    """Verify that MiniSWEAgent.agent_step produces a STEP span."""
+
+    def test_step_span_created(self, span_exporter, instrument):
+        """agent_step should create a STEP span with token attributes."""
+        import slop_code.agent_runner.agents.miniswe as mod
+
+        agent = mod.MiniSWEAgent(problem_name="test_prob")
+        result = agent.agent_step()
+
+        spans = span_exporter.get_finished_spans()
+        step_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.span.kind") == "STEP"
+        ]
+        assert len(step_spans) == 1
+
+        span = step_spans[0]
+        assert span.name == "react.step.1"
+        assert span.attributes["gen_ai.system"] == "slop-code"
+        assert span.attributes["gen_ai.operation.name"] == "react"
+        assert span.attributes["gen_ai.react.round"] == 1
+        assert span.status.status_code == StatusCode.OK
+
+    def test_step_span_has_token_usage(self, span_exporter, instrument):
+        """STEP span should capture token usage from result."""
+        import slop_code.agent_runner.agents.miniswe as mod
+
+        agent = mod.MiniSWEAgent(problem_name="test_prob")
+        agent.agent_step()
+
+        spans = span_exporter.get_finished_spans()
+        step_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.span.kind") == "STEP"
+        ]
+        assert len(step_spans) == 1
+        span = step_spans[0]
+
+        assert span.attributes["gen_ai.usage.input_tokens"] == 200
+        assert span.attributes["gen_ai.usage.output_tokens"] == 80
+        assert span.attributes["gen_ai.usage.cache_read.input_tokens"] == 50
+        assert span.attributes["gen_ai.usage.cache_creation.input_tokens"] == 10
+
+    def test_step_span_increments_round(self, span_exporter, instrument):
+        """Multiple agent_step calls should increment the round number."""
+        import slop_code.agent_runner.agents.miniswe as mod
+
+        agent = mod.MiniSWEAgent(problem_name="test_prob")
+        # Simulate steps=2 already completed
+        agent.usage.steps = 2
+        agent.agent_step()
+
+        spans = span_exporter.get_finished_spans()
+        step_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.span.kind") == "STEP"
+        ]
+        assert len(step_spans) == 1
+        assert step_spans[0].name == "react.step.3"
+        assert step_spans[0].attributes["gen_ai.react.round"] == 3
+
+    def test_step_span_error(self, span_exporter, tracer_provider):
+        """Exception in agent_step should produce an error STEP span."""
+        import slop_code.agent_runner.agents.miniswe as mod
+
+        from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor
+
+        class FailingMiniSWE(mod.MiniSWEAgent):
+            def agent_step(self):
+                raise RuntimeError("LimitsExceeded")
+
+        OriginalClass = mod.MiniSWEAgent
+        mod.MiniSWEAgent = FailingMiniSWE
+
+        instrumentor = SlopCodeInstrumentor()
+        instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True)
+
+        try:
+            agent = mod.MiniSWEAgent(problem_name="test_prob")
+
+            with pytest.raises(RuntimeError, match="LimitsExceeded"):
+                agent.agent_step()
+
+            spans = span_exporter.get_finished_spans()
+            step_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.span.kind") == "STEP"
+            ]
+            assert len(step_spans) == 1
+            span = step_spans[0]
+            assert span.status.status_code == StatusCode.ERROR
+            assert span.attributes["gen_ai.react.finish_reason"] == "error"
+        finally:
+            instrumentor.uninstrument()
+            mod.MiniSWEAgent = OriginalClass
+
+    def test_step_span_finish_reason_stop(self, span_exporter, instrument):
+        """Successful step should have finish_reason='stop'."""
+        import slop_code.agent_runner.agents.miniswe as mod
+
+        agent = mod.MiniSWEAgent(problem_name="test_prob")
+        agent.agent_step()
+
+        spans = span_exporter.get_finished_spans()
+        step_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.span.kind") == "STEP"
+        ]
+        assert step_spans[0].attributes["gen_ai.react.finish_reason"] == "stop"
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_task_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_task_span.py
new file mode 100644
index 000000000..de3e16a95
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_task_span.py
@@ -0,0 +1,110 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for TASK span (AgentRunner._run_checkpoint)."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from opentelemetry.trace import StatusCode
+
+
+class TestTaskSpan:
+    """Verify that AgentRunner._run_checkpoint produces a TASK span."""
+
+    def test_task_span_created(self, span_exporter, instrument):
+        """_run_checkpoint should create a task span."""
+        import slop_code.agent_runner.runner as mod
+
+        runner = mod.AgentRunner()
+
+        checkpoint = MagicMock()
+        checkpoint.name = "checkpoint_1"
+        checkpoint.order = 1
+
+        result = runner._run_checkpoint(checkpoint, "/tmp/save", True)
+
+        spans = span_exporter.get_finished_spans()
+        task_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.operation.name") == "run_task"
+        ]
+        assert len(task_spans) == 1
+
+        span = task_spans[0]
+        assert span.name == "task.checkpoint_1"
+        assert span.attributes["gen_ai.system"] == "slop-code"
+        assert span.attributes["gen_ai.span.kind"] == "TASK"
+        assert span.attributes["slop_code.checkpoint.name"] == "checkpoint_1"
+        assert span.attributes["slop_code.checkpoint.order"] == 1
+        assert span.attributes["slop_code.is_first_checkpoint"] is True
+        assert span.status.status_code == StatusCode.OK
+
+    def test_task_span_error(self, span_exporter, tracer_provider):
+        """Exception in _run_checkpoint should produce an error task span."""
+        import slop_code.agent_runner.runner as mod
+
+        from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor
+
+        class FailingRunner(mod.AgentRunner):
+            def _run_checkpoint(self, checkpoint, checkpoint_save_dir, is_first_checkpoint=False):
+                raise RuntimeError("Checkpoint failed")
+
+        # Replace class temporarily
+        OriginalRunner = mod.AgentRunner
+        mod.AgentRunner = FailingRunner
+
+        instrumentor = SlopCodeInstrumentor()
+        instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True)
+
+        try:
+            runner = mod.AgentRunner()
+            checkpoint = MagicMock()
+            checkpoint.name = "bad_checkpoint"
+            checkpoint.order = 2
+
+            with pytest.raises(RuntimeError, match="Checkpoint failed"):
+                runner._run_checkpoint(checkpoint, "/tmp/save", False)
+
+            spans = span_exporter.get_finished_spans()
+            task_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.operation.name") == "run_task"
+            ]
+            assert len(task_spans) == 1
+            assert task_spans[0].status.status_code == StatusCode.ERROR
+        finally:
+            instrumentor.uninstrument()
+            mod.AgentRunner = OriginalRunner
+
+    def test_task_span_not_first_checkpoint(self, span_exporter, instrument):
+        """Subsequent checkpoint should have is_first_checkpoint=False."""
+        import slop_code.agent_runner.runner as mod
+
+        runner = mod.AgentRunner()
+
+        checkpoint = MagicMock()
+        checkpoint.name = "checkpoint_2"
+        checkpoint.order = 2
+
+        runner._run_checkpoint(checkpoint, "/tmp/save", False)
+
+        spans = span_exporter.get_finished_spans()
+        task_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.operation.name") == "run_task"
+        ]
+        assert len(task_spans) == 1
+        assert task_spans[0].attributes["slop_code.is_first_checkpoint"] is False
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_workflow_span.py b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_workflow_span.py
new file mode 100644
index 000000000..6d0a79ddc
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-slop-code/tests/test_workflow_span.py
@@ -0,0 +1,117 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for CHAIN/workflow span (run_agent_on_problem)."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from opentelemetry.trace import StatusCode
+
+
+class TestWorkflowSpan:
+    """Verify that run_agent_on_problem produces a workflow span."""
+
+    def test_workflow_span_created(self, span_exporter, instrument):
+        """run_agent_on_problem should create a workflow span."""
+        import slop_code.entrypoints.problem_runner.worker as mod
+
+        config = MagicMock()
+        config.model_def = MagicMock()
+        config.model_def.name = "anthropic/claude-3.5-sonnet"
+        config.agent_config = MagicMock()
+        config.agent_config.type = "claude_code"
+        config.pass_policy = MagicMock()
+        config.pass_policy.value = "any"
+
+        result = mod.run_agent_on_problem(
+            MagicMock(),  # problem_config
+            "file_backup",  # problem_name
+            config,  # config
+            MagicMock(),  # progress_queue
+            "/tmp/output",  # output_path
+        )
+
+        spans = span_exporter.get_finished_spans()
+        workflow_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.operation.name") == "workflow"
+        ]
+        assert len(workflow_spans) == 1
+
+        span = workflow_spans[0]
+        assert span.name == "workflow.file_backup"
+        assert span.attributes["gen_ai.system"] == "slop-code"
+        assert span.attributes["gen_ai.span.kind"] == "CHAIN"
+        assert span.attributes["slop_code.problem.name"] == "file_backup"
+        assert span.attributes["gen_ai.request.model"] == "anthropic/claude-3.5-sonnet"
+        assert span.attributes["slop_code.agent.type"] == "claude_code"
+        assert span.status.status_code == StatusCode.OK
+
+    def test_workflow_span_error(self, span_exporter, tracer_provider):
+        """Exception in run_agent_on_problem should produce error workflow span."""
+        import slop_code.entrypoints.problem_runner.worker as mod
+
+        from opentelemetry.instrumentation.slop_code import SlopCodeInstrumentor
+
+        original = mod.run_agent_on_problem
+
+        def failing_worker(*args, **kwargs):
+            raise ValueError("Problem not found")
+
+        mod.run_agent_on_problem = failing_worker
+
+        instrumentor = SlopCodeInstrumentor()
+        instrumentor.instrument(tracer_provider=tracer_provider, skip_dep_check=True)
+
+        try:
+            with pytest.raises(ValueError, match="Problem not found"):
+                mod.run_agent_on_problem(
+                    MagicMock(), "missing_problem", MagicMock(), MagicMock(), "/tmp"
+                )
+
+            spans = span_exporter.get_finished_spans()
+            workflow_spans = [
+                s for s in spans
+                if s.attributes.get("gen_ai.operation.name") == "workflow"
+            ]
+            assert len(workflow_spans) == 1
+            assert workflow_spans[0].status.status_code == StatusCode.ERROR
+        finally:
+            instrumentor.uninstrument()
+            mod.run_agent_on_problem = original
+
+    def test_workflow_span_with_none_config_fields(self, span_exporter, instrument):
+        """Workflow span should handle None config fields gracefully."""
+        import slop_code.entrypoints.problem_runner.worker as mod
+
+        config = MagicMock()
+        config.model_def = None
+        config.agent_config = None
+        config.pass_policy = None
+
+        mod.run_agent_on_problem(
+            MagicMock(), "test_problem", config, MagicMock(), "/tmp"
+        )
+
+        spans = span_exporter.get_finished_spans()
+        workflow_spans = [
+            s for s in spans
+            if s.attributes.get("gen_ai.operation.name") == "workflow"
+        ]
+        assert len(workflow_spans) == 1
+        span = workflow_spans[0]
+        assert span.attributes["slop_code.problem.name"] == "test_problem"
+        assert "gen_ai.request.model" not in span.attributes