4242 stringify_exception ,
4343)
4444from .logger import init as _init_experiment
45- from .parameters import EvalParameters
45+ from .parameters import (
46+ EvalParameters ,
47+ RemoteEvalParameters ,
48+ ValidatedParameters ,
49+ is_eval_parameter_schema ,
50+ validate_parameters ,
51+ )
4652from .resource_manager import ResourceManager
4753from .score import Score , is_score , is_scorer
4854from .serializable_data_class import SerializableDataClass
@@ -215,7 +221,7 @@ def meta(self, **info: Any) -> None:
215221
216222 @property
217223 @abc .abstractmethod
218- def parameters (self ) -> dict [ str , Any ] | None :
224+ def parameters (self ) -> ValidatedParameters | None :
219225 """
220226 The parameters for the current evaluation. These are the validated parameter values
221227 that were passed to the evaluator.
@@ -439,12 +445,14 @@ class Evaluator(Generic[Input, Output]):
439445 Whether to summarize the scores of the experiment after it has run.
440446 """
441447
442- parameters : EvalParameters | None = None
448+ parameters : EvalParameters | RemoteEvalParameters | None = None
443449 """
444450 A set of parameters that will be passed to the evaluator.
445451 Can be used to define prompts or other configurable values.
446452 """
447453
454+ parameter_values : dict [str , Any ] | None = None
455+
448456
449457@dataclasses .dataclass
450458class EvalResultWithSummary (SerializableDataClass , Generic [Input , Output ]):
@@ -675,7 +683,7 @@ def _EvalCommon(
675683 summarize_scores : bool ,
676684 no_send_logs : bool ,
677685 error_score_handler : ErrorScoreHandler | None = None ,
678- parameters : EvalParameters | None = None ,
686+ parameters : EvalParameters | RemoteEvalParameters | None = None ,
679687 on_start : Callable [[ExperimentSummary ], None ] | None = None ,
680688 stream : Callable [[SSEProgressEvent ], None ] | None = None ,
681689 parent : str | None = None ,
@@ -741,6 +749,12 @@ async def make_empty_summary():
741749 if isinstance (evaluator .data , Dataset ):
742750 dataset = evaluator .data
743751
752+ experiment_parameters = None
753+ if isinstance (evaluator .parameters , RemoteEvalParameters ) and evaluator .parameters .id is not None :
754+ experiment_parameters = {"id" : evaluator .parameters .id }
755+ if evaluator .parameters .version is not None :
756+ experiment_parameters ["version" ] = evaluator .parameters .version
757+
744758 # NOTE: This code is duplicated with run_evaluator_task in py/src/braintrust/cli/eval.py.
745759 # Make sure to update those arguments if you change this.
746760 experiment = None
@@ -759,6 +773,7 @@ async def make_empty_summary():
759773 git_metadata_settings = evaluator .git_metadata_settings ,
760774 repo_info = evaluator .repo_info ,
761775 dataset = dataset ,
776+ parameters = experiment_parameters ,
762777 state = state ,
763778 )
764779
@@ -804,7 +819,7 @@ async def EvalAsync(
804819 description : str | None = None ,
805820 summarize_scores : bool = True ,
806821 no_send_logs : bool = False ,
807- parameters : EvalParameters | None = None ,
822+ parameters : EvalParameters | RemoteEvalParameters | None = None ,
808823 on_start : Callable [[ExperimentSummary ], None ] | None = None ,
809824 stream : Callable [[SSEProgressEvent ], None ] | None = None ,
810825 parent : str | None = None ,
@@ -931,7 +946,7 @@ def Eval(
931946 description : str | None = None ,
932947 summarize_scores : bool = True ,
933948 no_send_logs : bool = False ,
934- parameters : EvalParameters | None = None ,
949+ parameters : EvalParameters | RemoteEvalParameters | None = None ,
935950 on_start : Callable [[ExperimentSummary ], None ] | None = None ,
936951 stream : Callable [[SSEProgressEvent ], None ] | None = None ,
937952 parent : str | None = None ,
@@ -1153,7 +1168,7 @@ def __init__(
11531168 trial_index : int = 0 ,
11541169 tags : Sequence [str ] | None = None ,
11551170 report_progress : Callable [[TaskProgressEvent ], None ] = None ,
1156- parameters : dict [ str , Any ] | None = None ,
1171+ parameters : ValidatedParameters | None = None ,
11571172 ):
11581173 if metadata is not None :
11591174 self .update ({"metadata" : metadata })
@@ -1211,7 +1226,7 @@ def report_progress(self, event: TaskProgressEvent):
12111226 return self ._report_progress (event )
12121227
12131228 @property
1214- def parameters (self ) -> dict [ str , Any ] | None :
1229+ def parameters (self ) -> ValidatedParameters | None :
12151230 return self ._parameters
12161231
12171232
@@ -1392,6 +1407,15 @@ def get_other_fields(s):
13921407 scorer_names = [_scorer_name (scorer , i ) for i , scorer in enumerate (scorers )]
13931408 unhandled_scores = scorer_names
13941409
1410+ if evaluator .parameter_values is not None :
1411+ resolved_evaluator_parameters = evaluator .parameter_values
1412+ elif isinstance (evaluator .parameters , RemoteEvalParameters ):
1413+ resolved_evaluator_parameters = validate_parameters ({}, evaluator .parameters )
1414+ elif is_eval_parameter_schema (evaluator .parameters ):
1415+ resolved_evaluator_parameters = validate_parameters ({}, evaluator .parameters )
1416+ else :
1417+ resolved_evaluator_parameters = evaluator .parameters
1418+
13951419 async def run_evaluator_task (datum , trial_index = 0 ):
13961420 if isinstance (datum , dict ):
13971421 datum = EvalCase .from_dict (datum )
@@ -1451,7 +1475,7 @@ def report_progress(event: TaskProgressEvent):
14511475 trial_index = trial_index ,
14521476 tags = tags ,
14531477 report_progress = report_progress ,
1454- parameters = evaluator . parameters ,
1478+ parameters = resolved_evaluator_parameters ,
14551479 )
14561480
14571481 # Check if the task takes a hooks argument
0 commit comments