33import static dev .braintrust .json .BraintrustJsonMapper .fromJson ;
44import static dev .braintrust .json .BraintrustJsonMapper .toJson ;
55
6+ import com .fasterxml .jackson .databind .node .NullNode ;
67import com .sun .net .httpserver .HttpExchange ;
78import com .sun .net .httpserver .HttpHandler ;
89import com .sun .net .httpserver .HttpServer ;
@@ -180,32 +181,36 @@ private void handleList(HttpExchange exchange) throws IOException {
180181
181182 Map <String , Object > metadata = new LinkedHashMap <>();
182183
183- Map <String , Map <String , Object >> parametersMap = new LinkedHashMap <>();
184- for (Map .Entry <String , RemoteEval .Parameter > paramEntry :
185- eval .getParameters ().entrySet ()) {
186- String paramName = paramEntry .getKey ();
187- RemoteEval .Parameter param = paramEntry .getValue ();
184+ // Serialize parameters in the container format
185+ if (eval .getParameters ().isEmpty ()) {
186+ metadata .put ("parameters" , NullNode .getInstance ());
187+ } else {
188+ Map <String , Map <String , Object >> schemaMap = new LinkedHashMap <>();
189+ for (ParameterDef <?> param : eval .getParameters ()) {
190+ Map <String , Object > paramMetadata = new LinkedHashMap <>();
191+ paramMetadata .put ("type" , param .type ().toString ().toLowerCase ());
188192
189- Map <String , Object > paramMetadata = new LinkedHashMap <>();
190- paramMetadata .put ("type" , param .getType ().getValue ());
193+ if (param .schema () != null ) {
194+ paramMetadata .put ("schema" , param .schema ());
195+ }
191196
192- if (param .getDescription () != null ) {
193- paramMetadata .put ("description " , param .getDescription ());
194- }
197+ if (param .defaultValue () != null ) {
198+ paramMetadata .put ("default " , param .defaultValue ());
199+ }
195200
196- if (param .getDefaultValue () != null ) {
197- paramMetadata .put ("default " , param .getDefaultValue ());
198- }
201+ if (param .description () != null ) {
202+ paramMetadata .put ("description " , param .description ());
203+ }
199204
200- // Only include schema for data type parameters
201- if (param .getType () == RemoteEval .ParameterType .DATA
202- && param .getSchema () != null ) {
203- paramMetadata .put ("schema" , param .getSchema ());
205+ schemaMap .put (param .name (), paramMetadata );
204206 }
205207
206- parametersMap .put (paramName , paramMetadata );
208+ Map <String , Object > parametersContainer = new LinkedHashMap <>();
209+ parametersContainer .put ("type" , "braintrust.staticParameters" );
210+ parametersContainer .put ("schema" , schemaMap );
211+ parametersContainer .put ("source" , NullNode .getInstance ());
212+ metadata .put ("parameters" , parametersContainer );
207213 }
208- metadata .put ("parameters" , parametersMap );
209214
210215 // Add scores (list of scorer names)
211216 List <Map <String , String >> scores = new ArrayList <>();
@@ -245,7 +250,14 @@ private void handleEval(HttpExchange exchange) throws IOException {
245250 try {
246251 InputStream requestBody = exchange .getRequestBody ();
247252 var requestBodyString = new String (requestBody .readAllBytes (), StandardCharsets .UTF_8 );
248- EvalRequest request = fromJson (requestBodyString , EvalRequest .class );
253+ EvalRequest request ;
254+ try {
255+ request = fromJson (requestBodyString , EvalRequest .class );
256+ } catch (Exception e ) {
257+ sendResponse (
258+ exchange , 400 , "text/plain" , "Invalid request body: " + e .getMessage ());
259+ return ;
260+ }
249261
250262 // Validate evaluator exists
251263 RemoteEval eval = evals .get (request .getName ());
@@ -376,6 +388,14 @@ private <I, O> void handleStreamingEval(
376388
377389 var tracer = BraintrustTracing .getTracer ();
378390
391+ // Merge parameters: evaluator defaults + request overrides
392+ final Parameters mergedParameters =
393+ new Parameters (
394+ eval .getParameters (),
395+ null == request .getParameters ()
396+ ? Map .of ()
397+ : request .getParameters ());
398+
379399 // Execute task and scorers for each case
380400 final Map <String , List <Double >> scoresByName = new ConcurrentHashMap <>();
381401 final var parentInfo = extractParentInfo (request );
@@ -414,7 +434,9 @@ private <I, O> void handleStreamingEval(
414434 .makeCurrent ()) {
415435 var task = eval .getTask ();
416436 try {
417- taskResult = task .apply (datasetCase );
437+ taskResult =
438+ task .apply (
439+ datasetCase , mergedParameters );
418440 } catch (Exception e ) {
419441 taskSpan .setStatus (
420442 StatusCode .ERROR , e .getMessage ());
@@ -431,6 +453,21 @@ private <I, O> void handleStreamingEval(
431453 "Task threw exception for input: "
432454 + datasetCase .input (),
433455 e );
456+ // Set eval span attributes so Braintrust can
457+ // resolve the trace
458+ setEvalSpanAttributesForError (
459+ evalSpan ,
460+ braintrustParent ,
461+ braintrustGeneration ,
462+ datasetCase );
463+ // Send progress event even on error so the
464+ // Playground can link to the trace
465+ sendProgressEvent (
466+ os ,
467+ evalSpan .getSpanContext ().getSpanId (),
468+ datasetCase .origin (),
469+ eval .getName (),
470+ null );
434471 // run scoreForTaskException on each scorer
435472 List <Scorer <I , O >> allScorersForError =
436473 new ArrayList <>(eval .getScorers ());
@@ -578,6 +615,38 @@ private void setEvalSpanAttributes(
578615 "braintrust.output_json" , toJson (Map .of ("output" , taskResult .result ())));
579616 }
580617
618+ /**
619+ * Sets eval span attributes when the task threw an exception. Similar to {@link
620+ * #setEvalSpanAttributes} but does not require a TaskResult.
621+ */
622+ private void setEvalSpanAttributesForError (
623+ Span evalSpan ,
624+ BraintrustUtils .Parent braintrustParent ,
625+ String braintrustGeneration ,
626+ DatasetCase <?, ?> datasetCase ) {
627+ var spanAttrs = new LinkedHashMap <>();
628+ spanAttrs .put ("type" , "eval" );
629+ spanAttrs .put ("name" , "eval" );
630+ if (braintrustGeneration != null ) {
631+ spanAttrs .put ("generation" , braintrustGeneration );
632+ }
633+ evalSpan .setAttribute (PARENT , braintrustParent .toParentValue ())
634+ .setAttribute ("braintrust.span_attributes" , toJson (spanAttrs ))
635+ .setAttribute ("braintrust.input_json" , toJson (Map .of ("input" , datasetCase .input ())))
636+ .setAttribute ("braintrust.expected_json" , toJson (datasetCase .expected ()));
637+
638+ if (datasetCase .origin ().isPresent ()) {
639+ evalSpan .setAttribute ("braintrust.origin" , toJson (datasetCase .origin ().get ()));
640+ }
641+ if (!datasetCase .tags ().isEmpty ()) {
642+ evalSpan .setAttribute (
643+ AttributeKey .stringArrayKey ("braintrust.tags" ), datasetCase .tags ());
644+ }
645+ if (!datasetCase .metadata ().isEmpty ()) {
646+ evalSpan .setAttribute ("braintrust.metadata" , toJson (datasetCase .metadata ()));
647+ }
648+ }
649+
581650 private void setTaskSpanAttributes (
582651 Span taskSpan ,
583652 BraintrustUtils .Parent braintrustParent ,
0 commit comments