Skip to content

Commit 2d4ea2a

Browse files
committed
btx test runner
1 parent 41df87d commit 2d4ea2a

27 files changed

Lines changed: 2551 additions & 5 deletions

braintrust-sdk/instrumentation/springai_1_0_0/src/main/java/dev/braintrust/instrumentation/springai/v1_0_0/AnthropicBuilderWrapper.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package dev.braintrust.instrumentation.springai.v1_0_0;
22

3+
import com.fasterxml.jackson.databind.JsonNode;
34
import com.fasterxml.jackson.databind.node.ArrayNode;
45
import com.fasterxml.jackson.databind.node.ObjectNode;
56
import dev.braintrust.instrumentation.InstrumentationSemConv;
@@ -74,7 +75,17 @@ static void tagSpanRequest(
7475
for (Message msg : prompt.getInstructions()) {
7576
ObjectNode msgNode = BraintrustJsonMapper.get().createObjectNode();
7677
msgNode.put("role", msg.getMessageType().getValue().toLowerCase());
77-
msgNode.put("content", msg.getText());
78+
String text = msg.getText();
79+
try {
80+
JsonNode parsed = BraintrustJsonMapper.get().readTree(text);
81+
if (parsed.isArray() || parsed.isObject()) {
82+
msgNode.set("content", parsed);
83+
} else {
84+
msgNode.put("content", text);
85+
}
86+
} catch (Exception e) {
87+
msgNode.put("content", text);
88+
}
7889
messages.add(msgNode);
7990
}
8091
String model = null;

braintrust-sdk/instrumentation/springai_1_0_0/src/main/java/dev/braintrust/instrumentation/springai/v1_0_0/OpenAIBuilderWrapper.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package dev.braintrust.instrumentation.springai.v1_0_0;
22

3+
import com.fasterxml.jackson.databind.JsonNode;
34
import com.fasterxml.jackson.databind.node.ArrayNode;
45
import com.fasterxml.jackson.databind.node.ObjectNode;
56
import dev.braintrust.instrumentation.InstrumentationSemConv;
@@ -76,8 +77,7 @@ static void tagSpanRequest(
7677
// If the content text is a JSON array or object (e.g. multi-part content with images),
7778
// emit it as a structured JSON node rather than a plain string.
7879
try {
79-
com.fasterxml.jackson.databind.JsonNode parsed =
80-
BraintrustJsonMapper.get().readTree(text);
80+
JsonNode parsed = BraintrustJsonMapper.get().readTree(text);
8181
if (parsed.isArray() || parsed.isObject()) {
8282
msgNode.set("content", parsed);
8383
} else {

btx/build.gradle

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
plugins {
2+
id 'java'
3+
}
4+
5+
java {
6+
toolchain {
7+
languageVersion = JavaLanguageVersion.of(17)
8+
}
9+
}
10+
11+
repositories {
12+
mavenCentral()
13+
mavenLocal()
14+
}
15+
16+
dependencies {
17+
// Braintrust SDK (local project dependencies)
18+
testImplementation project(':braintrust-sdk')
19+
testImplementation project(':braintrust-sdk:instrumentation:openai_2_8_0')
20+
testImplementation project(':braintrust-sdk:instrumentation:anthropic_2_2_0')
21+
testImplementation project(':braintrust-sdk:instrumentation:genai_1_18_0')
22+
testImplementation project(':braintrust-sdk:instrumentation:langchain_1_8_0')
23+
testImplementation project(':braintrust-sdk:instrumentation:springai_1_0_0')
24+
25+
// Jackson for JSON processing
26+
testImplementation 'com.fasterxml.jackson.core:jackson-databind:2.16.1'
27+
28+
// OpenAI SDK
29+
testImplementation 'com.openai:openai-java:2.8.1'
30+
31+
// Anthropic SDK
32+
testImplementation 'com.anthropic:anthropic-java:2.10.0'
33+
34+
// Gemini SDK
35+
testImplementation 'org.springframework.ai:spring-ai-google-genai:1.1.0'
36+
37+
// Spring AI (OpenAI + Anthropic providers)
38+
testImplementation 'org.springframework.ai:spring-ai-openai:1.1.3'
39+
testImplementation 'org.springframework.ai:spring-ai-anthropic:1.1.3'
40+
testRuntimeOnly 'org.springframework:spring-webflux:6.2.3'
41+
testRuntimeOnly 'io.projectreactor.netty:reactor-netty-http:1.2.3'
42+
testImplementation 'org.apache.httpcomponents.client5:httpclient5:5.3.1'
43+
44+
// LangChain4j
45+
testImplementation 'dev.langchain4j:langchain4j:1.9.1'
46+
testImplementation 'dev.langchain4j:langchain4j-http-client:1.9.1'
47+
testImplementation 'dev.langchain4j:langchain4j-open-ai:1.9.1'
48+
49+
// OpenTelemetry
50+
testImplementation 'io.opentelemetry:opentelemetry-api:1.54.1'
51+
52+
// YAML parsing for spec files
53+
testImplementation 'org.yaml:snakeyaml:2.3'
54+
55+
// Test framework
56+
testImplementation(testFixtures(project(":test-harness")))
57+
testImplementation "org.junit.jupiter:junit-jupiter:${rootProject.ext.junitVersion}"
58+
testImplementation "org.junit.jupiter:junit-jupiter-params:${rootProject.ext.junitVersion}"
59+
testImplementation "io.opentelemetry:opentelemetry-sdk:${rootProject.ext.otelVersion}"
60+
testRuntimeOnly 'org.slf4j:slf4j-simple:2.0.17'
61+
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
62+
}
63+
64+
test {
65+
useJUnitPlatform()
66+
workingDir = rootProject.projectDir
67+
testLogging {
68+
events "passed", "skipped", "failed"
69+
showStandardStreams = true
70+
exceptionFormat "full"
71+
}
72+
73+
// Pass -Pbtx.spec.filter=<glob> to pre-filter which specs are executed before JUnit runs.
74+
// Example: ./gradlew btx:test -Pbtx.spec.filter=openai
75+
if (project.hasProperty('btx.spec.filter')) {
76+
systemProperty 'btx.spec.filter', project.property('btx.spec.filter')
77+
}
78+
}

btx/spec/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Braintrust Spec
2+
3+
Cross language specs for implementing a Braintrust SDK.
4+
5+
Contains:
6+
7+
- markdown files describing complex features
8+
- yaml describing end-to-end tests and assertions
9+
- yaml describing cross-language constants (envars, string attributes)

btx/spec/llm_span/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# llm span end-to-end tests
2+
3+
TODO: document this
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: attachments
2+
type: llm_span_test
3+
provider: anthropic
4+
endpoint: /v1/messages
5+
enabled_runners: ["java"]
6+
requests:
7+
- model: claude-haiku-4-5-20251001
8+
temperature: 0.0
9+
max_tokens: 128
10+
messages:
11+
- role: user
12+
content:
13+
- type: text
14+
text: What color is this image?
15+
- type: image
16+
source:
17+
type: base64
18+
media_type: image/png
19+
# 1x1 red pixel
20+
data: iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==
21+
expected_brainstore_spans:
22+
- metrics:
23+
tokens: !fn is_non_negative_number
24+
prompt_tokens: !fn is_non_negative_number
25+
completion_tokens: !fn is_non_negative_number
26+
metadata:
27+
model: claude-haiku-4-5-20251001
28+
provider: anthropic
29+
span_attributes:
30+
name: anthropic.messages.create
31+
type: llm
32+
input:
33+
- role: user
34+
content:
35+
- type: text
36+
text: What color is this image?
37+
- type: image
38+
source:
39+
type: braintrust_attachment
40+
content_type: image/png
41+
filename: !fn is_non_empty_string
42+
key: !fn is_non_empty_string
43+
output:
44+
content:
45+
- text: !fn is_non_empty_string
46+
type: text
47+
role: assistant
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: messages
2+
type: llm_span_test
3+
provider: anthropic
4+
endpoint: /v1/messages
5+
enabled_runners: ["python", "typescript", "java", "csharp"]
6+
requests:
7+
- model: claude-haiku-4-5-20251001
8+
temperature: 0.0
9+
max_tokens: 128
10+
system: "You are a helpful assistant."
11+
messages:
12+
- role: user
13+
content: What is the capital of France?
14+
expected_brainstore_spans:
15+
- metrics:
16+
tokens: !fn is_non_negative_number
17+
prompt_tokens: !fn is_non_negative_number
18+
completion_tokens: !fn is_non_negative_number
19+
metadata:
20+
model: claude-haiku-4-5-20251001
21+
provider: anthropic
22+
span_attributes:
23+
name: anthropic.messages.create
24+
type: llm
25+
input:
26+
- content: What is the capital of France?
27+
role: user
28+
- content: "You are a helpful assistant."
29+
role: system
30+
output:
31+
content:
32+
- text: The capital of France is Paris.
33+
type: text
34+
role: assistant
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
name: streaming
2+
type: llm_span_test
3+
provider: anthropic
4+
endpoint: /v1/messages
5+
enabled_runners: ["java"]
6+
requests:
7+
- model: claude-haiku-4-5-20251001
8+
temperature: 0.0
9+
max_tokens: 128
10+
system: "You are a helpful assistant."
11+
messages:
12+
- role: user
13+
content: Count from 1 to 5.
14+
stream: true
15+
expected_brainstore_spans:
16+
- metrics:
17+
time_to_first_token: !fn is_non_negative_number
18+
tokens: !fn is_non_negative_number
19+
prompt_tokens: !fn is_non_negative_number
20+
completion_tokens: !fn is_non_negative_number
21+
metadata:
22+
model: claude-haiku-4-5-20251001
23+
provider: anthropic
24+
span_attributes:
25+
name: anthropic.messages.create
26+
type: llm
27+
input:
28+
- content: Count from 1 to 5.
29+
role: user
30+
- content: "You are a helpful assistant."
31+
role: system
32+
output:
33+
content:
34+
- text: !fn is_non_empty_string
35+
type: text
36+
role: assistant
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: attachments
2+
type: llm_span_test
3+
provider: google
4+
endpoint: /v1/models/gemini-2.0-flash:generateContent
5+
enabled_runners: ["python", "typescript", "java", "go"]
6+
requests:
7+
- contents:
8+
- role: user
9+
parts:
10+
- text: What color is this image?
11+
- inline_data:
12+
mime_type: image/png
13+
# 1x1 red pixel
14+
data: iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==
15+
generationConfig:
16+
temperature: 0.0
17+
expected_brainstore_spans:
18+
- metrics:
19+
tokens: !fn is_non_negative_number
20+
prompt_tokens: !fn is_non_negative_number
21+
completion_tokens: !fn is_non_negative_number
22+
metadata:
23+
model: gemini-2.0-flash
24+
span_attributes:
25+
name: generate_content
26+
type: llm
27+
input:
28+
model: gemini-2.0-flash
29+
contents:
30+
- role: user
31+
parts:
32+
- text: What color is this image?
33+
- image_url:
34+
url:
35+
content_type: image/png
36+
filename: !fn is_non_empty_string
37+
key: !fn is_non_empty_string
38+
type: braintrust_attachment
39+
output:
40+
candidates:
41+
- content:
42+
parts:
43+
- text: !fn is_non_empty_string
44+
role: model
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: generate_content
2+
type: llm_span_test
3+
provider: google
4+
endpoint: /v1/models/gemini-2.5-flash:generateContent
5+
enabled_runners: ["python", "typescript", "java", "go"]
6+
requests:
7+
- contents:
8+
- role: user
9+
parts:
10+
- text: What is the capital of France?
11+
generationConfig:
12+
temperature: 0.0
13+
expected_brainstore_spans:
14+
- metrics:
15+
tokens: !fn is_non_negative_number
16+
prompt_tokens: !fn is_non_negative_number
17+
completion_tokens: !fn is_non_negative_number
18+
metadata:
19+
model: gemini-2.5-flash
20+
span_attributes:
21+
name: generate_content
22+
type: llm
23+
input:
24+
model: gemini-2.5-flash
25+
contents:
26+
- role: user
27+
parts:
28+
- text: What is the capital of France?
29+
output:
30+
candidates:
31+
- content:
32+
parts:
33+
- text: !fn is_non_empty_string
34+
role: model

0 commit comments

Comments
 (0)