alibaba · 123liuziming · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-bfclv4/CHANGELOG.md b/instrumentation-loongsuite/loongsuite-instrumentation-bfclv4/CHANGELOG.md
@@ -0,0 +1,22 @@
+# Changelog
+
+All notable changes to the LoongSuite BFCL v4 instrumentation are documented
+in this file.
+
+## Unreleased
+
+### Added
+
+- Initial release of `loongsuite-instrumentation-bfclv4`.
+- ENTRY span around `bfcl_eval._llm_response_generation.generate_results`.
+- AGENT span around `bfcl_eval.model_handler.base_handler.BaseHandler.inference`
+  with cross-thread OTel context propagation via a narrow patch of
+  `bfcl_eval._llm_response_generation.ThreadPoolExecutor`.
+- STEP spans created by reflectively wrapping each handler's
+  `_query_FC` / `_query_prompting` (discovered via
+  `bfcl_eval.constants.model_config.MODEL_CONFIG_MAPPING`).
+- Per-call TOOL spans emitted by wrapping
+  `bfcl_eval.eval_checker.multi_turn_eval.multi_turn_utils.execute_multi_turn_func_call`.
+- Provider override mapping for OSS handlers (vLLM / SGLang).
+- Multi-turn `bfcl.turn_idx` and ReAct `gen_ai.react.round` tracking via
+  `contextvars`.
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-bfclv4/README.md b/instrumentation-loongsuite/loongsuite-instrumentation-bfclv4/README.md
@@ -0,0 +1,79 @@
+# LoongSuite BFCL v4 Instrumentation
+
+LoongSuite Python instrumentation for the [Berkeley Function Call
+Leaderboard v4](https://github.com/ShishirPatil/gorilla/tree/main/berkeley-function-call-leaderboard)
+(`bfcl-eval`, package `bfcl_eval`).
+
+## Span Topology
+
+```
+ENTRY  enter_ai_application_system          gen_ai.span.kind=ENTRY,  op=enter
+└─ AGENT  invoke_agent {test_entry_id}      gen_ai.span.kind=AGENT,  op=invoke_agent
+   ├─ STEP  react step                      gen_ai.span.kind=STEP,   op=react
+   │   ├─ LLM   chat {model}                (created by downstream vendor SDK probe)
+   │   └─ TOOL  execute_tool {fn}           gen_ai.span.kind=TOOL,   op=execute_tool
+   └─ STEP  react step
+       └─ ...
+```
+
+This instrumentation deliberately does **not** create LLM spans. They are
+emitted by the downstream vendor SDK probe (OpenAI / Anthropic / Google /
+DashScope / LiteLLM / etc.) so that token usage and request payloads stay in
+sync with the SDK that actually performed the request.
+
+## Installation
+
+```bash
+pip install loongsuite-instrumentation-bfclv4
+```
+
+## Usage
+
+```bash
+opentelemetry-instrument bfcl generate \
+    --model gpt-4o-2024-11-20-FC \
+    --test-category simple_python \
+    --num-threads 2
+```
+
+Or programmatically:
+
+```python
+from opentelemetry.instrumentation.bfclv4 import BFCLv4Instrumentor
+
+BFCLv4Instrumentor().instrument()
+# ... run BFCL ...
+BFCLv4Instrumentor().uninstrument()
+```
+
+## Compatibility With Downstream LLM SDK Probes
+
+| Scenario | Recommended downstream probe |
+| --- | --- |
+| OpenAI / OpenAI Responses / OSS via vLLM / SGLang / DeepSeek (OpenAI-compatible) | `opentelemetry-instrumentation-openai` |
+| Anthropic / Claude | `loongsuite-instrumentation-claude-agent-sdk` |
+| Gemini / Google | `loongsuite-instrumentation-google-adk` |
+| Qwen / DashScope | `loongsuite-instrumentation-dashscope` |
+| LiteLLM | `loongsuite-instrumentation-litellm` |
+
+## OSS Provider Notes
+
+For OSS handlers (vLLM / SGLang served via the OpenAI-compatible API), the
+BFCL probe sets `gen_ai.provider.name` to `vllm` / `sglang` / `oss` and adds
+`bfcl.oss.backend` for disambiguation. Downstream OpenAI probes will still
+report `gen_ai.provider.name=openai` on the LLM span; this is expected.
+
+## Custom Attributes
+
+| Attribute | Where | Description |
+| --- | --- | --- |
+| `gen_ai.framework` = `bfclv4` | ENTRY/AGENT/STEP/TOOL | Framework tag |
+| `bfcl.test_category` | ENTRY/AGENT | Test category |
+| `bfcl.num_threads` | ENTRY | Configured thread pool size |
+| `bfcl.test_case_count` | ENTRY | Number of test cases |
+| `bfcl.run_ids` | ENTRY | Whether the run targeted specific IDs |
+| `bfcl.test_entry_id` | AGENT | Test entry id |
+| `bfcl.turn_idx` | STEP | Multi-turn turn index (0-based) |
+| `bfcl.query_mode` | STEP | `FC` or `prompting` |
+| `bfcl.oss.backend` | AGENT/STEP | `vllm` / `sglang` / `unknown` (only OSS) |
+| `bfcl.tool.duration_is_estimated` | TOOL | True (latency is averaged across batch) |
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-bfclv4/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-bfclv4/pyproject.toml
@@ -0,0 +1,54 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "loongsuite-instrumentation-bfclv4"
+dynamic = ["version"]
+description = "LoongSuite BFCL v4 (Berkeley Function Call Leaderboard) instrumentation"
+readme = "README.md"
+license = "Apache-2.0"
+requires-python = ">=3.10,<4"
+authors = [
+  { name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+  "opentelemetry-api >= 1.37.0",
+  "opentelemetry-instrumentation >= 0.58b0",
+  "opentelemetry-semantic-conventions >= 0.58b0",
+  "wrapt >= 1.0.0, < 2.0.0",
+  "opentelemetry-util-genai >= 0.3b0.dev0",
+]
+
+[project.optional-dependencies]
+instruments = [
+  "bfcl-eval >= 4.0.0",
+]
+
+[project.entry-points.opentelemetry_instrumentor]
+bfclv4 = "opentelemetry.instrumentation.bfclv4:BFCLv4Instrumentor"
+
+[project.urls]
+Homepage = "https://github.com/alibaba/loongsuite-python-agent/tree/main/instrumentation-loongsuite/loongsuite-instrumentation-bfclv4"
+Repository = "https://github.com/alibaba/loongsuite-python-agent"
+
+[tool.hatch.version]
+path = "src/opentelemetry/instrumentation/bfclv4/version.py"
+
+[tool.hatch.build.targets.sdist]
+include = [
+  "/src",
+  "/tests",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/opentelemetry"]