4242 stringify_exception ,
4343)
4444from .logger import init as _init_experiment
45- from .parameters import EvalParameters
45+ from .parameters import EvalParameters , RemoteEvalParameters , is_eval_parameter_schema , validate_parameters
4646from .resource_manager import ResourceManager
4747from .score import Score , is_score , is_scorer
4848from .serializable_data_class import SerializableDataClass
@@ -439,12 +439,14 @@ class Evaluator(Generic[Input, Output]):
439439 Whether to summarize the scores of the experiment after it has run.
440440 """
441441
442- parameters : EvalParameters | None = None
442+ parameters : EvalParameters | RemoteEvalParameters | None = None
443443 """
444444 A set of parameters that will be passed to the evaluator.
445445 Can be used to define prompts or other configurable values.
446446 """
447447
448+ parameter_values : dict [str , Any ] | None = None
449+
448450
449451@dataclasses .dataclass
450452class EvalResultWithSummary (SerializableDataClass , Generic [Input , Output ]):
@@ -675,7 +677,7 @@ def _EvalCommon(
675677 summarize_scores : bool ,
676678 no_send_logs : bool ,
677679 error_score_handler : ErrorScoreHandler | None = None ,
678- parameters : EvalParameters | None = None ,
680+ parameters : EvalParameters | RemoteEvalParameters | None = None ,
679681 on_start : Callable [[ExperimentSummary ], None ] | None = None ,
680682 stream : Callable [[SSEProgressEvent ], None ] | None = None ,
681683 parent : str | None = None ,
@@ -741,6 +743,12 @@ async def make_empty_summary():
741743 if isinstance (evaluator .data , Dataset ):
742744 dataset = evaluator .data
743745
746+ experiment_parameters = None
747+ if RemoteEvalParameters .is_parameters (evaluator .parameters ) and evaluator .parameters .id is not None :
748+ experiment_parameters = {"id" : evaluator .parameters .id }
749+ if evaluator .parameters .version is not None :
750+ experiment_parameters ["version" ] = evaluator .parameters .version
751+
744752 # NOTE: This code is duplicated with run_evaluator_task in py/src/braintrust/cli/eval.py.
745753 # Make sure to update those arguments if you change this.
746754 experiment = None
@@ -759,6 +767,7 @@ async def make_empty_summary():
759767 git_metadata_settings = evaluator .git_metadata_settings ,
760768 repo_info = evaluator .repo_info ,
761769 dataset = dataset ,
770+ parameters = experiment_parameters ,
762771 state = state ,
763772 )
764773
@@ -804,7 +813,7 @@ async def EvalAsync(
804813 description : str | None = None ,
805814 summarize_scores : bool = True ,
806815 no_send_logs : bool = False ,
807- parameters : EvalParameters | None = None ,
816+ parameters : EvalParameters | RemoteEvalParameters | None = None ,
808817 on_start : Callable [[ExperimentSummary ], None ] | None = None ,
809818 stream : Callable [[SSEProgressEvent ], None ] | None = None ,
810819 parent : str | None = None ,
@@ -931,7 +940,7 @@ def Eval(
931940 description : str | None = None ,
932941 summarize_scores : bool = True ,
933942 no_send_logs : bool = False ,
934- parameters : EvalParameters | None = None ,
943+ parameters : EvalParameters | RemoteEvalParameters | None = None ,
935944 on_start : Callable [[ExperimentSummary ], None ] | None = None ,
936945 stream : Callable [[SSEProgressEvent ], None ] | None = None ,
937946 parent : str | None = None ,
@@ -1392,6 +1401,15 @@ def get_other_fields(s):
13921401 scorer_names = [_scorer_name (scorer , i ) for i , scorer in enumerate (scorers )]
13931402 unhandled_scores = scorer_names
13941403
1404+ if evaluator .parameter_values is not None :
1405+ resolved_evaluator_parameters = evaluator .parameter_values
1406+ elif RemoteEvalParameters .is_parameters (evaluator .parameters ):
1407+ resolved_evaluator_parameters = validate_parameters ({}, evaluator .parameters )
1408+ elif is_eval_parameter_schema (evaluator .parameters ):
1409+ resolved_evaluator_parameters = validate_parameters ({}, evaluator .parameters )
1410+ else :
1411+ resolved_evaluator_parameters = evaluator .parameters
1412+
13951413 async def run_evaluator_task (datum , trial_index = 0 ):
13961414 if isinstance (datum , dict ):
13971415 datum = EvalCase .from_dict (datum )
@@ -1451,7 +1469,7 @@ def report_progress(event: TaskProgressEvent):
14511469 trial_index = trial_index ,
14521470 tags = tags ,
14531471 report_progress = report_progress ,
1454- parameters = evaluator . parameters ,
1472+ parameters = resolved_evaluator_parameters ,
14551473 )
14561474
14571475 # Check if the task takes a hooks argument
0 commit comments