Skip to content

Commit fce31e6

Browse files
authored
Merge pull request #64 from braintrustdata/ark/remote-eval-params
support remote eval params
2 parents 6bad5d7 + 0a423ec commit fce31e6

13 files changed

Lines changed: 858 additions & 115 deletions

File tree

braintrust-sdk/src/main/java/dev/braintrust/devserver/Devserver.java

Lines changed: 90 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static dev.braintrust.json.BraintrustJsonMapper.fromJson;
44
import static dev.braintrust.json.BraintrustJsonMapper.toJson;
55

6+
import com.fasterxml.jackson.databind.node.NullNode;
67
import com.sun.net.httpserver.HttpExchange;
78
import com.sun.net.httpserver.HttpHandler;
89
import com.sun.net.httpserver.HttpServer;
@@ -180,32 +181,36 @@ private void handleList(HttpExchange exchange) throws IOException {
180181

181182
Map<String, Object> metadata = new LinkedHashMap<>();
182183

183-
Map<String, Map<String, Object>> parametersMap = new LinkedHashMap<>();
184-
for (Map.Entry<String, RemoteEval.Parameter> paramEntry :
185-
eval.getParameters().entrySet()) {
186-
String paramName = paramEntry.getKey();
187-
RemoteEval.Parameter param = paramEntry.getValue();
184+
// Serialize parameters in the container format
185+
if (eval.getParameters().isEmpty()) {
186+
metadata.put("parameters", NullNode.getInstance());
187+
} else {
188+
Map<String, Map<String, Object>> schemaMap = new LinkedHashMap<>();
189+
for (ParameterDef<?> param : eval.getParameters()) {
190+
Map<String, Object> paramMetadata = new LinkedHashMap<>();
191+
paramMetadata.put("type", param.type().toString().toLowerCase());
188192

189-
Map<String, Object> paramMetadata = new LinkedHashMap<>();
190-
paramMetadata.put("type", param.getType().getValue());
193+
if (param.schema() != null) {
194+
paramMetadata.put("schema", param.schema());
195+
}
191196

192-
if (param.getDescription() != null) {
193-
paramMetadata.put("description", param.getDescription());
194-
}
197+
if (param.defaultValue() != null) {
198+
paramMetadata.put("default", param.defaultValue());
199+
}
195200

196-
if (param.getDefaultValue() != null) {
197-
paramMetadata.put("default", param.getDefaultValue());
198-
}
201+
if (param.description() != null) {
202+
paramMetadata.put("description", param.description());
203+
}
199204

200-
// Only include schema for data type parameters
201-
if (param.getType() == RemoteEval.ParameterType.DATA
202-
&& param.getSchema() != null) {
203-
paramMetadata.put("schema", param.getSchema());
205+
schemaMap.put(param.name(), paramMetadata);
204206
}
205207

206-
parametersMap.put(paramName, paramMetadata);
208+
Map<String, Object> parametersContainer = new LinkedHashMap<>();
209+
parametersContainer.put("type", "braintrust.staticParameters");
210+
parametersContainer.put("schema", schemaMap);
211+
parametersContainer.put("source", NullNode.getInstance());
212+
metadata.put("parameters", parametersContainer);
207213
}
208-
metadata.put("parameters", parametersMap);
209214

210215
// Add scores (list of scorer names)
211216
List<Map<String, String>> scores = new ArrayList<>();
@@ -245,7 +250,14 @@ private void handleEval(HttpExchange exchange) throws IOException {
245250
try {
246251
InputStream requestBody = exchange.getRequestBody();
247252
var requestBodyString = new String(requestBody.readAllBytes(), StandardCharsets.UTF_8);
248-
EvalRequest request = fromJson(requestBodyString, EvalRequest.class);
253+
EvalRequest request;
254+
try {
255+
request = fromJson(requestBodyString, EvalRequest.class);
256+
} catch (Exception e) {
257+
sendResponse(
258+
exchange, 400, "text/plain", "Invalid request body: " + e.getMessage());
259+
return;
260+
}
249261

250262
// Validate evaluator exists
251263
RemoteEval eval = evals.get(request.getName());
@@ -376,6 +388,14 @@ private <I, O> void handleStreamingEval(
376388

377389
var tracer = BraintrustTracing.getTracer();
378390

391+
// Merge parameters: evaluator defaults + request overrides
392+
final Parameters mergedParameters =
393+
new Parameters(
394+
eval.getParameters(),
395+
null == request.getParameters()
396+
? Map.of()
397+
: request.getParameters());
398+
379399
// Execute task and scorers for each case
380400
final Map<String, List<Double>> scoresByName = new ConcurrentHashMap<>();
381401
final var parentInfo = extractParentInfo(request);
@@ -414,7 +434,9 @@ private <I, O> void handleStreamingEval(
414434
.makeCurrent()) {
415435
var task = eval.getTask();
416436
try {
417-
taskResult = task.apply(datasetCase);
437+
taskResult =
438+
task.apply(
439+
datasetCase, mergedParameters);
418440
} catch (Exception e) {
419441
taskSpan.setStatus(
420442
StatusCode.ERROR, e.getMessage());
@@ -431,6 +453,21 @@ private <I, O> void handleStreamingEval(
431453
"Task threw exception for input: "
432454
+ datasetCase.input(),
433455
e);
456+
// Set eval span attributes so Braintrust can
457+
// resolve the trace
458+
setEvalSpanAttributesForError(
459+
evalSpan,
460+
braintrustParent,
461+
braintrustGeneration,
462+
datasetCase);
463+
// Send progress event even on error so the
464+
// Playground can link to the trace
465+
sendProgressEvent(
466+
os,
467+
evalSpan.getSpanContext().getSpanId(),
468+
datasetCase.origin(),
469+
eval.getName(),
470+
null);
434471
// run scoreForTaskException on each scorer
435472
List<Scorer<I, O>> allScorersForError =
436473
new ArrayList<>(eval.getScorers());
@@ -578,6 +615,38 @@ private void setEvalSpanAttributes(
578615
"braintrust.output_json", toJson(Map.of("output", taskResult.result())));
579616
}
580617

618+
/**
619+
* Sets eval span attributes when the task threw an exception. Similar to {@link
620+
* #setEvalSpanAttributes} but does not require a TaskResult.
621+
*/
622+
private void setEvalSpanAttributesForError(
623+
Span evalSpan,
624+
BraintrustUtils.Parent braintrustParent,
625+
String braintrustGeneration,
626+
DatasetCase<?, ?> datasetCase) {
627+
var spanAttrs = new LinkedHashMap<>();
628+
spanAttrs.put("type", "eval");
629+
spanAttrs.put("name", "eval");
630+
if (braintrustGeneration != null) {
631+
spanAttrs.put("generation", braintrustGeneration);
632+
}
633+
evalSpan.setAttribute(PARENT, braintrustParent.toParentValue())
634+
.setAttribute("braintrust.span_attributes", toJson(spanAttrs))
635+
.setAttribute("braintrust.input_json", toJson(Map.of("input", datasetCase.input())))
636+
.setAttribute("braintrust.expected_json", toJson(datasetCase.expected()));
637+
638+
if (datasetCase.origin().isPresent()) {
639+
evalSpan.setAttribute("braintrust.origin", toJson(datasetCase.origin().get()));
640+
}
641+
if (!datasetCase.tags().isEmpty()) {
642+
evalSpan.setAttribute(
643+
AttributeKey.stringArrayKey("braintrust.tags"), datasetCase.tags());
644+
}
645+
if (!datasetCase.metadata().isEmpty()) {
646+
evalSpan.setAttribute("braintrust.metadata", toJson(datasetCase.metadata()));
647+
}
648+
}
649+
581650
private void setTaskSpanAttributes(
582651
Span taskSpan,
583652
BraintrustUtils.Parent braintrustParent,
Lines changed: 24 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
package dev.braintrust.devserver;
22

3+
import dev.braintrust.eval.DatasetCase;
4+
import dev.braintrust.eval.ParameterDef;
5+
import dev.braintrust.eval.Parameters;
36
import dev.braintrust.eval.Scorer;
47
import dev.braintrust.eval.Task;
8+
import dev.braintrust.eval.TaskResult;
59
import java.util.*;
610
import java.util.function.Function;
711
import javax.annotation.Nonnull;
8-
import javax.annotation.Nullable;
912
import lombok.Builder;
1013
import lombok.Getter;
1114
import lombok.Singular;
@@ -36,8 +39,8 @@ public class RemoteEval<INPUT, OUTPUT> {
3639
*/
3740
@Singular @Nonnull private final List<Scorer<INPUT, OUTPUT>> scorers;
3841

39-
/** Optional parameters that can be configured from the UI */
40-
@Singular @Nonnull private final Map<String, Parameter> parameters;
42+
/** Optional parameter definitions that can be configured from the UI */
43+
@Singular @Nonnull private final List<ParameterDef<?>> parameters;
4144

4245
public static class Builder<INPUT, OUTPUT> {
4346
/**
@@ -48,84 +51,29 @@ public static class Builder<INPUT, OUTPUT> {
4851
*/
4952
public Builder<INPUT, OUTPUT> taskFunction(Function<INPUT, OUTPUT> taskFn) {
5053
return task(
51-
datasetCase -> {
52-
var result = taskFn.apply(datasetCase.input());
53-
return new dev.braintrust.eval.TaskResult<>(result, datasetCase);
54+
new Task<>() {
55+
@Override
56+
public TaskResult<INPUT, OUTPUT> apply(
57+
DatasetCase<INPUT, OUTPUT> datasetCase, Parameters parameters)
58+
throws Exception {
59+
var result = taskFn.apply(datasetCase.input());
60+
return new TaskResult<>(result, datasetCase, parameters);
61+
}
5462
});
5563
}
5664

5765
/** Build the RemoteEval */
5866
public RemoteEval<INPUT, OUTPUT> build() {
59-
// can add build hooks here later if desired
60-
return internalBuild();
61-
}
62-
}
63-
64-
/** Represents a configurable parameter for the evaluator */
65-
@Getter
66-
@lombok.Builder(builderClassName = "Builder")
67-
public static class Parameter {
68-
/** Type of parameter: "prompt" or "data" */
69-
@Nonnull private final ParameterType type;
70-
71-
/** Optional description of the parameter */
72-
@Nullable private final String description;
73-
74-
/** Optional default value for the parameter */
75-
@Nullable private final Object defaultValue;
76-
77-
/**
78-
* JSON Schema for data type parameters. Only applicable when type is DATA. Should be a Map
79-
* representing a JSON Schema object.
80-
*/
81-
@Nullable private final Map<String, Object> schema;
82-
83-
public static Parameter promptParameter(String description, Object defaultValue) {
84-
return Parameter.builder()
85-
.type(ParameterType.PROMPT)
86-
.description(description)
87-
.defaultValue(defaultValue)
88-
.build();
89-
}
90-
91-
public static Parameter promptParameter(Object defaultValue) {
92-
return promptParameter(null, defaultValue);
93-
}
94-
95-
public static Parameter dataParameter(
96-
String description, Map<String, Object> schema, Object defaultValue) {
97-
return Parameter.builder()
98-
.type(ParameterType.DATA)
99-
.description(description)
100-
.schema(schema)
101-
.defaultValue(defaultValue)
102-
.build();
103-
}
104-
105-
public static Parameter dataParameter(Map<String, Object> schema, Object defaultValue) {
106-
return dataParameter(null, schema, defaultValue);
107-
}
108-
109-
public static Parameter dataParameter(Map<String, Object> schema) {
110-
return dataParameter(null, schema, null);
111-
}
112-
}
113-
114-
/** Parameter type enumeration */
115-
public enum ParameterType {
116-
/** Prompt parameter (for LLM prompts) */
117-
PROMPT("prompt"),
118-
/** Data parameter (for other configuration data) */
119-
DATA("data");
120-
121-
private final String value;
122-
123-
ParameterType(String value) {
124-
this.value = value;
125-
}
126-
127-
public String getValue() {
128-
return value;
67+
var result = internalBuild();
68+
// Validate parameter names are unique
69+
var seen = new HashSet<String>();
70+
for (var param : result.getParameters()) {
71+
if (!seen.add(param.name())) {
72+
throw new IllegalArgumentException(
73+
"Duplicate parameter name: '" + param.name() + "'");
74+
}
75+
}
76+
return result;
12977
}
13078
}
13179
}

braintrust-sdk/src/main/java/dev/braintrust/devserver/RequestContext.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
@Getter
1414
@Builder
15-
public class RequestContext {
15+
class RequestContext {
1616
/** Validated origin from CORS */
1717
private final String appOrigin;
1818

braintrust-sdk/src/main/java/dev/braintrust/eval/Eval.java

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public final class Eval<INPUT, OUTPUT> {
3838
private final @Nonnull List<Scorer<INPUT, OUTPUT>> scorers;
3939
private final @Nonnull List<String> tags;
4040
private final @Nonnull Map<String, Object> metadata;
41+
private final @Nonnull Parameters parameters;
4142

4243
private Eval(Builder<INPUT, OUTPUT> builder) {
4344
this.experimentName = builder.experimentName;
@@ -59,6 +60,7 @@ private Eval(Builder<INPUT, OUTPUT> builder) {
5960
this.scorers = List.copyOf(builder.scorers);
6061
this.tags = List.copyOf(builder.tags);
6162
this.metadata = Map.copyOf(builder.metadata);
63+
this.parameters = builder.buildParameters();
6264
}
6365

6466
/** Runs the evaluation and returns results. */
@@ -129,7 +131,7 @@ private void evalOne(String experimentId, DatasetCase<INPUT, OUTPUT> datasetCase
129131
.startSpan();
130132
try (var unused =
131133
BraintrustContext.ofExperiment(experimentId, taskSpan).makeCurrent()) {
132-
taskResult = task.apply(datasetCase);
134+
taskResult = task.apply(datasetCase, parameters);
133135
rootSpan.setAttribute(
134136
"braintrust.output_json",
135137
toJson(Map.of("output", taskResult.result())));
@@ -252,6 +254,8 @@ public static final class Builder<INPUT, OUTPUT> {
252254
private @Nullable Tracer tracer = null;
253255
private @Nullable Task<INPUT, OUTPUT> task;
254256
private @Nonnull List<Scorer<INPUT, OUTPUT>> scorers = List.of();
257+
private @Nonnull List<ParameterDef<?>> parameterDefs = List.of();
258+
private @Nonnull Map<String, Object> parameterValues = Map.of();
255259
private @Nonnull List<String> tags = List.of();
256260
private @Nonnull Map<String, Object> metadata = Map.of();
257261

@@ -335,9 +339,10 @@ public Builder<INPUT, OUTPUT> taskFunction(Function<INPUT, OUTPUT> taskFn) {
335339
new Task<>() {
336340
@Override
337341
public TaskResult<INPUT, OUTPUT> apply(
338-
DatasetCase<INPUT, OUTPUT> datasetCase) {
342+
DatasetCase<INPUT, OUTPUT> datasetCase, Parameters parameters)
343+
throws Exception {
339344
var result = taskFn.apply(datasetCase.input());
340-
return new TaskResult<>(result, datasetCase);
345+
return new TaskResult<>(result, datasetCase, parameters);
341346
}
342347
});
343348
}
@@ -365,5 +370,39 @@ public Builder<INPUT, OUTPUT> metadata(Map<String, Object> metadata) {
365370
this.metadata = Map.copyOf(metadata);
366371
return this;
367372
}
373+
374+
/**
375+
* Sets parameter definitions for this eval. Default values from the definitions are used
376+
* unless overridden via {@link #parameterValues(Map)}.
377+
*/
378+
@SuppressWarnings("rawtypes")
379+
public Builder<INPUT, OUTPUT> parameters(ParameterDef<?>... parameterDefs) {
380+
this.parameterDefs = List.of(parameterDefs);
381+
return this;
382+
}
383+
384+
/** Sets parameter definitions for this eval. */
385+
public Builder<INPUT, OUTPUT> parameters(List<ParameterDef<?>> parameterDefs) {
386+
this.parameterDefs = List.copyOf(parameterDefs);
387+
return this;
388+
}
389+
390+
/**
391+
* Sets explicit parameter values, overriding any defaults from parameter definitions. Keys
392+
* not present here fall back to the default value from the corresponding {@link
393+
* ParameterDef}.
394+
*/
395+
public Builder<INPUT, OUTPUT> parameterValues(Map<String, Object> values) {
396+
this.parameterValues = Map.copyOf(values);
397+
return this;
398+
}
399+
400+
/** Builds the merged Parameters from definitions and explicit values. */
401+
private Parameters buildParameters() {
402+
if (parameterDefs.isEmpty() && parameterValues.isEmpty()) {
403+
return Parameters.empty();
404+
}
405+
return new Parameters(parameterDefs, parameterValues);
406+
}
368407
}
369408
}

0 commit comments

Comments
 (0)