diff --git a/doc/api.rst b/doc/api.rst index 48de65a5ef..9130c21fdc 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -273,7 +273,9 @@ API Reference class_name_to_snake_case ComponentIdentifier + compute_eval_hash config_hash + EvaluationIdentity Identifiable snake_case_to_class_name diff --git a/pyrit/identifiers/__init__.py b/pyrit/identifiers/__init__.py index 04fae18195..4c2de95bcb 100644 --- a/pyrit/identifiers/__init__.py +++ b/pyrit/identifiers/__init__.py @@ -8,10 +8,13 @@ snake_case_to_class_name, ) from pyrit.identifiers.component_identifier import ComponentIdentifier, Identifiable, config_hash +from pyrit.identifiers.evaluation_identity import EvaluationIdentity, compute_eval_hash __all__ = [ "class_name_to_snake_case", "ComponentIdentifier", + "compute_eval_hash", + "EvaluationIdentity", "Identifiable", "snake_case_to_class_name", "config_hash", diff --git a/pyrit/identifiers/evaluation_identity.py b/pyrit/identifiers/evaluation_identity.py new file mode 100644 index 0000000000..9a0dd187d5 --- /dev/null +++ b/pyrit/identifiers/evaluation_identity.py @@ -0,0 +1,178 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Evaluation identity and eval-hash computation. + +This module provides: + +* ``_build_eval_dict`` — builds a filtered dict for eval-hash computation. +* ``compute_eval_hash`` — free function that computes a behavioral equivalence + hash from a ``ComponentIdentifier``. +* ``EvaluationIdentity`` — abstract base that wraps a ``ComponentIdentifier`` + with domain-specific eval-hash configuration. Concrete subclasses declare + *which* children are targets and *which* params are behavioral via two + ``ClassVar`` frozensets. +""" + +from __future__ import annotations + +from abc import ABC +from typing import Any, ClassVar, Optional + +from pyrit.identifiers.component_identifier import ComponentIdentifier, config_hash + + +def _build_eval_dict( + identifier: ComponentIdentifier, + *, + target_child_keys: frozenset[str], + behavioral_child_params: frozenset[str], + param_allowlist: Optional[frozenset[str]] = None, +) -> dict[str, Any]: + """ + Build a filtered dictionary for eval-hash computation. + + Includes only behavioral parameters. For child components whose names appear + in ``target_child_keys``, only params in ``behavioral_child_params`` are kept + (stripping operational params like endpoint, max_requests_per_minute). + Non-target children receive full eval treatment recursively. + + Args: + identifier (ComponentIdentifier): The component identity to process. + target_child_keys (frozenset[str]): Child names that are targets + (e.g., ``{"prompt_target", "converter_target"}``). + behavioral_child_params (frozenset[str]): Param allowlist applied to + target children (e.g., ``{"model_name", "temperature", "top_p"}``). + param_allowlist (Optional[frozenset[str]]): If provided, only include + params whose keys are in the allowlist. If None, include all params. + + Returns: + dict[str, Any]: The filtered dictionary suitable for hashing. + """ + eval_dict: dict[str, Any] = { + ComponentIdentifier.KEY_CLASS_NAME: identifier.class_name, + ComponentIdentifier.KEY_CLASS_MODULE: identifier.class_module, + } + + eval_dict.update( + { + key: value + for key, value in sorted(identifier.params.items()) + if value is not None and (param_allowlist is None or key in param_allowlist) + } + ) + + if identifier.children: + eval_children: dict[str, Any] = {} + for name in sorted(identifier.children): + child_list = identifier.get_child_list(name) + if name in target_child_keys: + # Targets: filter to behavioral params only + hashes = [ + config_hash( + _build_eval_dict( + c, + target_child_keys=target_child_keys, + behavioral_child_params=behavioral_child_params, + param_allowlist=behavioral_child_params, + ) + ) + for c in child_list + ] + else: + # Non-targets (e.g., sub-scorers): full eval treatment, recurse without param filtering + hashes = [ + config_hash( + _build_eval_dict( + c, + target_child_keys=target_child_keys, + behavioral_child_params=behavioral_child_params, + ) + ) + for c in child_list + ] + eval_children[name] = hashes[0] if len(hashes) == 1 else hashes + if eval_children: + eval_dict["children"] = eval_children + + return eval_dict + + +def compute_eval_hash( + identifier: ComponentIdentifier, + *, + target_child_keys: frozenset[str], + behavioral_child_params: frozenset[str], +) -> str: + """ + Compute a behavioral equivalence hash for evaluation grouping. + + Unlike ``ComponentIdentifier.hash`` (which includes all params of self and + children), the eval hash filters child components that are "targets" to only + their behavioral params (e.g., model_name, temperature, top_p), stripping + operational params like endpoint or max_requests_per_minute. This ensures the + same logical configuration on different deployments produces the same eval hash. + + Non-target children (e.g., sub-scorers) receive full recursive eval treatment. + + When ``target_child_keys`` is empty, no child filtering occurs and the result + equals ``identifier.hash``. + + Args: + identifier (ComponentIdentifier): The component identity to compute the hash for. + target_child_keys (frozenset[str]): Child names that are targets + (e.g., ``{"prompt_target", "converter_target"}``). + behavioral_child_params (frozenset[str]): Param allowlist for target children + (e.g., ``{"model_name", "temperature", "top_p"}``). + + Returns: + str: A hex-encoded SHA256 hash suitable for eval registry keying. + """ + if not target_child_keys: + return identifier.hash + + eval_dict = _build_eval_dict( + identifier, + target_child_keys=target_child_keys, + behavioral_child_params=behavioral_child_params, + ) + return config_hash(eval_dict) + + +class EvaluationIdentity(ABC): + """ + Wraps a ``ComponentIdentifier`` with domain-specific eval-hash configuration. + + Subclasses must set the two ``ClassVar`` frozensets: + + * ``TARGET_CHILD_KEYS`` — child names whose operational params should be + stripped (e.g., ``{"prompt_target", "converter_target"}``). + * ``BEHAVIORAL_CHILD_PARAMS`` — param allowlist applied to those target + children (e.g., ``{"model_name", "temperature", "top_p"}``). + + The concrete ``eval_hash`` property delegates to the module-level + ``compute_eval_hash`` free function. + """ + + TARGET_CHILD_KEYS: ClassVar[frozenset[str]] + BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] + + def __init__(self, identifier: ComponentIdentifier) -> None: + """Wrap a ComponentIdentifier and eagerly compute its eval hash.""" + self._identifier = identifier + self._eval_hash = compute_eval_hash( + identifier, + target_child_keys=self.TARGET_CHILD_KEYS, + behavioral_child_params=self.BEHAVIORAL_CHILD_PARAMS, + ) + + @property + def identifier(self) -> ComponentIdentifier: + """The underlying component identity.""" + return self._identifier + + @property + def eval_hash(self) -> str: + """Behavioral equivalence hash for evaluation grouping.""" + return self._eval_hash diff --git a/pyrit/models/scenario_result.py b/pyrit/models/scenario_result.py index 1093687874..9ceee151c8 100644 --- a/pyrit/models/scenario_result.py +++ b/pyrit/models/scenario_result.py @@ -223,15 +223,14 @@ def get_scorer_evaluation_metrics(self) -> Optional["ScorerMetrics"]: """ # import here to avoid circular imports + from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity from pyrit.score.scorer_evaluation.scorer_metrics_io import ( - find_objective_metrics_by_hash, + find_objective_metrics_by_eval_hash, ) if not self.objective_scorer_identifier: return None - scorer_hash = self.objective_scorer_identifier.hash - if not scorer_hash: - return None + eval_hash = ScorerEvaluationIdentity(self.objective_scorer_identifier).eval_hash - return find_objective_metrics_by_hash(hash=scorer_hash) + return find_objective_metrics_by_eval_hash(eval_hash=eval_hash) diff --git a/pyrit/score/float_scale/float_scale_scorer.py b/pyrit/score/float_scale/float_scale_scorer.py index 62337102fd..126dd909f7 100644 --- a/pyrit/score/float_scale/float_scale_scorer.py +++ b/pyrit/score/float_scale/float_scale_scorer.py @@ -52,14 +52,16 @@ def get_scorer_metrics(self) -> Optional["HarmScorerMetrics"]: HarmScorerMetrics: The metrics for this scorer, or None if not found or not configured. """ from pyrit.score.scorer_evaluation.scorer_metrics_io import ( - find_harm_metrics_by_hash, + find_harm_metrics_by_eval_hash, ) if self.evaluation_file_mapping is None or self.evaluation_file_mapping.harm_category is None: return None - scorer_hash = self.get_identifier().hash - return find_harm_metrics_by_hash(hash=scorer_hash, harm_category=self.evaluation_file_mapping.harm_category) + return find_harm_metrics_by_eval_hash( + eval_hash=self.get_eval_hash(), + harm_category=self.evaluation_file_mapping.harm_category, + ) async def _score_value_with_llm( self, diff --git a/pyrit/score/printer/console_scorer_printer.py b/pyrit/score/printer/console_scorer_printer.py index bce9566d77..108070318c 100644 --- a/pyrit/score/printer/console_scorer_printer.py +++ b/pyrit/score/printer/console_scorer_printer.py @@ -100,8 +100,9 @@ def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> N Args: scorer_identifier (ComponentIdentifier): The scorer identifier to print information for. """ + from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity from pyrit.score.scorer_evaluation.scorer_metrics_io import ( - find_objective_metrics_by_hash, + find_objective_metrics_by_eval_hash, ) print() @@ -109,9 +110,9 @@ def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> N self._print_colored(f"{self._indent * 2}▸ Scorer Identifier", Fore.WHITE) self._print_scorer_info(scorer_identifier, indent_level=3) - # Look up metrics by hash - scorer_hash = scorer_identifier.hash - metrics = find_objective_metrics_by_hash(hash=scorer_hash) + # Look up metrics by eval hash + eval_hash = ScorerEvaluationIdentity(scorer_identifier).eval_hash + metrics = find_objective_metrics_by_eval_hash(eval_hash=eval_hash) self._print_objective_metrics(metrics) def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_category: str) -> None: @@ -127,8 +128,9 @@ def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_cate scorer_identifier (ComponentIdentifier): The scorer identifier to print information for. harm_category (str): The harm category for looking up metrics (e.g., "hate_speech", "violence"). """ + from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity from pyrit.score.scorer_evaluation.scorer_metrics_io import ( - find_harm_metrics_by_hash, + find_harm_metrics_by_eval_hash, ) print() @@ -136,9 +138,9 @@ def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_cate self._print_colored(f"{self._indent * 2}▸ Scorer Identifier", Fore.WHITE) self._print_scorer_info(scorer_identifier, indent_level=3) - # Look up metrics by hash and harm category - scorer_hash = scorer_identifier.hash - metrics = find_harm_metrics_by_hash(hash=scorer_hash, harm_category=harm_category) + # Look up metrics by eval hash and harm category + eval_hash = ScorerEvaluationIdentity(scorer_identifier).eval_hash + metrics = find_harm_metrics_by_eval_hash(eval_hash=eval_hash, harm_category=harm_category) self._print_harm_metrics(metrics) def _print_scorer_info(self, scorer_identifier: ComponentIdentifier, *, indent_level: int = 2) -> None: diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index 5b248d9b99..1e07066f8d 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -55,8 +55,8 @@ class Scorer(Identifiable, abc.ABC): Abstract base class for scorers. """ - # Evaluation configuration - maps input dataset files to a result file - # Specifies glob patterns for datasets and a result file name + # Evaluation configuration - maps input dataset files to a result file. + # Specifies glob patterns for datasets and a result file name. evaluation_file_mapping: Optional[ScorerEvalDatasetFiles] = None _identifier: Optional[ComponentIdentifier] = None @@ -70,6 +70,22 @@ def __init__(self, *, validator: ScorerPromptValidator): """ self._validator = validator + def get_eval_hash(self) -> str: + """ + Compute a behavioral equivalence hash for evaluation grouping. + + Delegates to ``ScorerEvaluationIdentity`` which filters target children + (prompt_target, converter_target) to behavioral params only, so the same + scorer configuration on different deployments produces the same eval hash. + + Returns: + str: A hex-encoded SHA256 hash suitable for eval registry keying. + """ + # Deferred import to avoid circular dependency (scorer_evaluation_identity → identifiers → …) + from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity + + return ScorerEvaluationIdentity(self.get_identifier()).eval_hash + @property def scorer_type(self) -> ScoreType: """ diff --git a/pyrit/score/scorer_evaluation/scorer_evaluation_identity.py b/pyrit/score/scorer_evaluation/scorer_evaluation_identity.py new file mode 100644 index 0000000000..184109bfe6 --- /dev/null +++ b/pyrit/score/scorer_evaluation/scorer_evaluation_identity.py @@ -0,0 +1,28 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Scorer-specific evaluation identity. + +``ScorerEvaluationIdentity`` declares which children are "targets" and which +target params are behavioral for the scorer evaluation domain. +""" + +from __future__ import annotations + +from typing import ClassVar + +from pyrit.identifiers.evaluation_identity import EvaluationIdentity + + +class ScorerEvaluationIdentity(EvaluationIdentity): + """ + Evaluation identity for scorers. + + Target children (``prompt_target``, ``converter_target``) are filtered to + behavioral params only (``model_name``, ``temperature``, ``top_p``), so the + same scorer configuration on different deployments produces the same eval hash. + """ + + TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"prompt_target", "converter_target"}) + BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name", "temperature", "top_p"}) diff --git a/pyrit/score/scorer_evaluation/scorer_evaluator.py b/pyrit/score/scorer_evaluation/scorer_evaluator.py index b5a8ac95ec..843a9d1ff2 100644 --- a/pyrit/score/scorer_evaluation/scorer_evaluator.py +++ b/pyrit/score/scorer_evaluation/scorer_evaluator.py @@ -29,8 +29,8 @@ ScorerMetrics, ) from pyrit.score.scorer_evaluation.scorer_metrics_io import ( - find_harm_metrics_by_hash, - find_objective_metrics_by_hash, + find_harm_metrics_by_eval_hash, + find_objective_metrics_by_eval_hash, replace_evaluation_results, ) from pyrit.score.true_false.true_false_scorer import TrueFalseScorer @@ -275,7 +275,7 @@ def _should_skip_evaluation( - (False, None) if should run evaluation """ try: - scorer_hash = self.scorer.get_identifier().hash + scorer_hash = self.scorer.get_eval_hash() # Determine if this is a harm or objective evaluation metrics_type = MetricsType.OBJECTIVE if isinstance(self.scorer, TrueFalseScorer) else MetricsType.HARM @@ -285,14 +285,14 @@ def _should_skip_evaluation( if harm_category is None: logger.warning("harm_category must be provided for harm scorer evaluations") return (False, None) - existing = find_harm_metrics_by_hash( - hash=scorer_hash, + existing = find_harm_metrics_by_eval_hash( + eval_hash=scorer_hash, harm_category=harm_category, ) else: - existing = find_objective_metrics_by_hash( + existing = find_objective_metrics_by_eval_hash( file_path=result_file_path, - hash=scorer_hash, + eval_hash=scorer_hash, ) if not existing: @@ -487,6 +487,7 @@ def _write_metrics_to_registry( replace_evaluation_results( file_path=result_file_path, scorer_identifier=self.scorer.get_identifier(), + eval_hash=self.scorer.get_eval_hash(), metrics=metrics, ) except Exception as e: diff --git a/pyrit/score/scorer_evaluation/scorer_metrics_io.py b/pyrit/score/scorer_evaluation/scorer_metrics_io.py index 1f2db065d3..b2ead2468e 100644 --- a/pyrit/score/scorer_evaluation/scorer_metrics_io.py +++ b/pyrit/score/scorer_evaluation/scorer_metrics_io.py @@ -16,7 +16,7 @@ from pyrit.common.path import ( SCORER_EVALS_PATH, ) -from pyrit.identifiers import ComponentIdentifier, config_hash +from pyrit.identifiers import ComponentIdentifier from pyrit.score.scorer_evaluation.scorer_metrics import ( HarmScorerMetrics, ObjectiveScorerMetrics, @@ -32,83 +32,6 @@ M = TypeVar("M", bound=ScorerMetrics) -# Child component params that affect scoring behavior. -# Operational params (endpoint, max_requests_per_minute, etc.) are excluded -# so that the same model on different deployments shares cached eval results. -_BEHAVIORAL_CHILD_PARAMS = frozenset({"model_name", "temperature", "top_p"}) -_TARGET_CHILD_KEYS = frozenset({"prompt_target", "converter_target"}) - - -def _build_eval_dict( - identifier: ComponentIdentifier, - *, - param_allowlist: Optional[frozenset[str]] = None, -) -> dict[str, Any]: - """ - Build a dictionary for eval hashing. - - This function creates a filtered representation of a component's configuration, - including only behavioral parameters. For child components that are targets, - only behavioral params are included. For non-target children, full evaluation - treatment is applied recursively. - - Args: - identifier (ComponentIdentifier): The component identity to process. - param_allowlist (Optional[frozenset[str]]): If provided, only include - params whose keys are in the allowlist. If None, include all params. - Target children are filtered to _BEHAVIORAL_CHILD_PARAMS, while - non-target children receive full eval treatment without param filtering. - - Returns: - Dict[str, Any]: The filtered dictionary suitable for hashing. - """ - eval_dict: dict[str, Any] = { - ComponentIdentifier.KEY_CLASS_NAME: identifier.class_name, - ComponentIdentifier.KEY_CLASS_MODULE: identifier.class_module, - } - - eval_dict.update( - { - k: v - for k, v in sorted(identifier.params.items()) - if v is not None and (param_allowlist is None or k in param_allowlist) - } - ) - - if identifier.children: - eval_children: dict[str, Any] = {} - for name in sorted(identifier.children): - child_list = identifier.get_child_list(name) - if name in _TARGET_CHILD_KEYS: - # Targets: filter to behavioral params only - hashes = [ - config_hash(_build_eval_dict(c, param_allowlist=_BEHAVIORAL_CHILD_PARAMS)) for c in child_list - ] - else: - # Non-targets (e.g., sub-scorers): full eval treatment, recurse without param filtering - hashes = [config_hash(_build_eval_dict(c)) for c in child_list] - eval_children[name] = hashes[0] if len(hashes) == 1 else hashes - if eval_children: - eval_dict["children"] = eval_children - - return eval_dict - - -def compute_eval_hash(identifier: ComponentIdentifier) -> str: - """ - Compute a behavioral equivalence hash for scorer evaluation grouping. - - Includes all of the scorer's own params but projects child components - down to only behavioral params (model_name, temperature, top_p). - - Args: - identifier (ComponentIdentifier): The scorer's full identity. - - Returns: - str: A hash suitable for eval registry keying. - """ - return config_hash(_build_eval_dict(identifier)) - def _metrics_to_registry_dict(metrics: ScorerMetrics) -> dict[str, Any]: """ @@ -225,16 +148,16 @@ def _load_metrics_from_file( return results -def find_objective_metrics_by_hash( +def find_objective_metrics_by_eval_hash( *, - hash: str, # noqa: A002 + eval_hash: str, file_path: Optional[Path] = None, ) -> Optional[ObjectiveScorerMetrics]: """ - Find objective scorer metrics by configuration hash. + Find objective scorer metrics by evaluation hash. Args: - hash (str): The scorer configuration hash to search for. + eval_hash (str): The scorer evaluation hash to search for. file_path (Optional[Path]): Path to the JSONL file to search. If not provided, uses the default path: SCORER_EVALS_PATH / "objective" / "objective_achieved_metrics.jsonl" @@ -245,42 +168,43 @@ def find_objective_metrics_by_hash( if file_path is None: file_path = SCORER_EVALS_PATH / "objective" / "objective_achieved_metrics.jsonl" - return _find_metrics_by_hash(file_path=file_path, hash=hash, metrics_class=ObjectiveScorerMetrics) + return _find_metrics_by_eval_hash(file_path=file_path, eval_hash=eval_hash, metrics_class=ObjectiveScorerMetrics) -def find_harm_metrics_by_hash( +def find_harm_metrics_by_eval_hash( *, - hash: str, # noqa: A002 + eval_hash: str, harm_category: str, ) -> Optional[HarmScorerMetrics]: """ - Find harm scorer metrics by configuration hash. + Find harm scorer metrics by evaluation hash. Args: - hash (str): The scorer configuration hash to search for. + eval_hash (str): The scorer evaluation hash to search for. harm_category (str): The harm category to search in (e.g., "hate_speech", "violence"). Returns: HarmScorerMetrics if found, else None. """ file_path = SCORER_EVALS_PATH / "harm" / f"{harm_category}_metrics.jsonl" - return _find_metrics_by_hash(file_path=file_path, hash=hash, metrics_class=HarmScorerMetrics) + return _find_metrics_by_eval_hash(file_path=file_path, eval_hash=eval_hash, metrics_class=HarmScorerMetrics) -def _find_metrics_by_hash( +def _find_metrics_by_eval_hash( *, file_path: Path, - hash: str, # noqa: A002 + eval_hash: str, metrics_class: type[M], ) -> Optional[M]: """ - Find scorer metrics by configuration hash in a specific file. + Find scorer metrics by evaluation hash in a specific file. - This is a private helper function used by find_objective_metrics_by_hash and find_harm_metrics_by_hash. + This is a private helper function used by find_objective_metrics_by_eval_hash + and find_harm_metrics_by_eval_hash. Args: file_path (Path): Path to the JSONL file to search. - hash (str): The scorer configuration hash to search for. + eval_hash (str): The scorer evaluation hash to search for. metrics_class (Type[M]): The metrics class to instantiate. Returns: @@ -289,14 +213,14 @@ def _find_metrics_by_hash( entries = _load_jsonl(file_path) for entry in entries: - if entry.get("hash") == hash: + if entry.get("eval_hash") == eval_hash: metrics_dict = entry.get("metrics", {}) # Filter out internal fields that have init=False (e.g., _harm_definition_obj) metrics_dict = {k: v for k, v in metrics_dict.items() if not k.startswith("_")} try: return metrics_class(**metrics_dict) except Exception as e: - logger.warning(f"Failed to parse metrics for hash {hash}: {e}") + logger.warning(f"Failed to parse metrics for eval_hash {eval_hash}: {e}") return None return None @@ -306,6 +230,7 @@ def add_evaluation_results( *, file_path: Path, scorer_identifier: ComponentIdentifier, + eval_hash: str, metrics: "ScorerMetrics", ) -> None: """ @@ -317,6 +242,7 @@ def add_evaluation_results( Args: file_path (Path): The full path to the JSONL file to append to. scorer_identifier (ComponentIdentifier): The scorer's configuration identifier. + eval_hash (str): The pre-computed evaluation hash for grouping. metrics (ScorerMetrics): The computed metrics (ObjectiveScorerMetrics or HarmScorerMetrics). """ # Get or create lock for this file path @@ -324,8 +250,6 @@ def add_evaluation_results( if file_path_str not in _file_write_locks: _file_write_locks[file_path_str] = threading.Lock() - eval_hash = compute_eval_hash(scorer_identifier) - # Build entry dictionary entry = scorer_identifier.to_dict() entry["eval_hash"] = eval_hash @@ -394,18 +318,20 @@ def replace_evaluation_results( *, file_path: Path, scorer_identifier: ComponentIdentifier, + eval_hash: str, metrics: "ScorerMetrics", ) -> None: """ - Replace existing scorer metrics entry (by hash) with new metrics, or add if not exists. + Replace existing scorer metrics entry (by eval_hash) with new metrics, or add if not exists. - This is an atomic operation that removes any existing entry with the same scorer hash - and adds the new entry. Only one entry per scorer hash is maintained in the registry, + This is an atomic operation that removes any existing entry with the same eval_hash + and adds the new entry. Only one entry per eval_hash is maintained in the registry, ensuring we always track the highest-fidelity evaluation. Args: file_path (Path): The full path to the JSONL file. scorer_identifier (ComponentIdentifier): The scorer's configuration identifier. + eval_hash (str): The pre-computed evaluation hash for grouping. metrics (ScorerMetrics): The computed metrics (ObjectiveScorerMetrics or HarmScorerMetrics). """ # Get or create lock for this file path @@ -413,8 +339,6 @@ def replace_evaluation_results( if file_path_str not in _file_write_locks: _file_write_locks[file_path_str] = threading.Lock() - eval_hash = compute_eval_hash(scorer_identifier) - # Build new entry dictionary new_entry = scorer_identifier.to_dict() new_entry["eval_hash"] = eval_hash diff --git a/pyrit/score/true_false/true_false_scorer.py b/pyrit/score/true_false/true_false_scorer.py index 1d61be4977..671dd57973 100644 --- a/pyrit/score/true_false/true_false_scorer.py +++ b/pyrit/score/true_false/true_false_scorer.py @@ -83,7 +83,7 @@ def get_scorer_metrics(self) -> Optional["ObjectiveScorerMetrics"]: """ from pyrit.common.path import SCORER_EVALS_PATH from pyrit.score.scorer_evaluation.scorer_metrics_io import ( - find_objective_metrics_by_hash, + find_objective_metrics_by_eval_hash, ) if self.evaluation_file_mapping is None: @@ -94,7 +94,7 @@ def get_scorer_metrics(self) -> Optional["ObjectiveScorerMetrics"]: if not result_file.exists(): return None - return find_objective_metrics_by_hash(hash=self.get_identifier().hash, file_path=result_file) + return find_objective_metrics_by_eval_hash(eval_hash=self.get_eval_hash(), file_path=result_file) async def _score_async(self, message: Message, *, objective: Optional[str] = None) -> list[Score]: """ diff --git a/tests/unit/identifiers/test_component_identifier.py b/tests/unit/identifiers/test_component_identifier.py index cb8dbe858e..7dfd9c9871 100644 --- a/tests/unit/identifiers/test_component_identifier.py +++ b/tests/unit/identifiers/test_component_identifier.py @@ -5,7 +5,12 @@ import pytest import pyrit -from pyrit.identifiers import ComponentIdentifier, Identifiable, config_hash +from pyrit.identifiers import ComponentIdentifier, Identifiable, compute_eval_hash, config_hash +from pyrit.identifiers.evaluation_identity import _build_eval_dict + +# Test constants mirroring Scorer's ClassVars — keeps tests decoupled from pyrit.score +_TARGET_CHILD_KEYS = frozenset({"prompt_target", "converter_target"}) +_BEHAVIORAL_CHILD_PARAMS = frozenset({"model_name", "temperature", "top_p"}) class TestComponentIdentifierCreation: @@ -714,3 +719,494 @@ def _build_identifier(self) -> ComponentIdentifier: assert isinstance(identifier, ComponentIdentifier) assert identifier.class_name == "MyComponent" assert identifier.params["key"] == "val" + + +class TestBuildEvalDict: + """Tests for the _build_eval_dict function.""" + + def test_basic_identifier_without_params_or_children(self): + """Test _build_eval_dict with a simple identifier with no params or children.""" + identifier = ComponentIdentifier( + class_name="SimpleScorer", + class_module="pyrit.score", + ) + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert result["class_name"] == "SimpleScorer" + assert result["class_module"] == "pyrit.score" + assert "children" not in result + + def test_includes_all_non_none_params(self): + """Test that all non-None params are included in the eval dict.""" + identifier = ComponentIdentifier( + class_name="ParamScorer", + class_module="pyrit.score", + params={"threshold": 0.5, "template": "prompt_text", "mode": "strict"}, + ) + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert result["threshold"] == 0.5 + assert result["template"] == "prompt_text" + assert result["mode"] == "strict" + + def test_param_allowlist_filters_params(self): + """Test that param_allowlist restricts which params are included.""" + identifier = ComponentIdentifier( + class_name="FilteredScorer", + class_module="pyrit.score", + params={"threshold": 0.5, "template": "prompt_text", "mode": "strict"}, + ) + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + param_allowlist=frozenset({"threshold", "mode"}), + ) + + assert result["threshold"] == 0.5 + assert result["mode"] == "strict" + assert "template" not in result + + def test_none_params_are_excluded(self): + """Test that None-valued params are excluded from the eval dict.""" + identifier = ComponentIdentifier( + class_name="NoneScorer", + class_module="pyrit.score", + params={"threshold": 0.5, "optional_field": None}, + ) + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert result["threshold"] == 0.5 + assert "optional_field" not in result + + def test_target_children_hashed_with_behavioral_params_only(self): + """Test that target children are projected to behavioral params only.""" + child = ComponentIdentifier( + class_name="ChildTarget", + class_module="pyrit.target", + params={ + "model_name": "gpt-4", + "temperature": 0.7, + "top_p": 0.9, + "max_requests_per_minute": 100, + "endpoint": "https://example.com", + }, + ) + identifier = ComponentIdentifier( + class_name="ParentScorer", + class_module="pyrit.score", + children={"prompt_target": child}, + ) + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert "children" in result + assert isinstance(result["children"]["prompt_target"], str) + + def test_target_children_same_behavioral_different_operational_produce_same_hash(self): + """Test that target children differing only in operational params produce the same child hash.""" + child1 = ComponentIdentifier( + class_name="ChildTarget", + class_module="pyrit.target", + params={ + "model_name": "gpt-4", + "temperature": 0.7, + "endpoint": "https://endpoint-a.com", + "max_requests_per_minute": 50, + }, + ) + child2 = ComponentIdentifier( + class_name="ChildTarget", + class_module="pyrit.target", + params={ + "model_name": "gpt-4", + "temperature": 0.7, + "endpoint": "https://endpoint-b.com", + "max_requests_per_minute": 200, + }, + ) + id1 = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"prompt_target": child1}, + ) + id2 = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"prompt_target": child2}, + ) + result1 = _build_eval_dict( + id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + result2 = _build_eval_dict( + id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + assert result1["children"]["prompt_target"] == result2["children"]["prompt_target"] + + def test_target_children_different_behavioral_produce_different_hash(self): + """Test that target children differing in behavioral params produce different child hashes.""" + child1 = ComponentIdentifier( + class_name="ChildTarget", + class_module="pyrit.target", + params={"model_name": "gpt-4", "temperature": 0.7}, + ) + child2 = ComponentIdentifier( + class_name="ChildTarget", + class_module="pyrit.target", + params={"model_name": "gpt-3.5-turbo", "temperature": 0.7}, + ) + id1 = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"prompt_target": child1}, + ) + id2 = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"prompt_target": child2}, + ) + result1 = _build_eval_dict( + id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + result2 = _build_eval_dict( + id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + assert result1["children"]["prompt_target"] != result2["children"]["prompt_target"] + + def test_multiple_children_as_list(self): + """Test that list-valued children produce a list of hashes.""" + child_a = ComponentIdentifier( + class_name="ChildA", + class_module="pyrit.target", + params={"model_name": "gpt-4"}, + ) + child_b = ComponentIdentifier( + class_name="ChildB", + class_module="pyrit.target", + params={"model_name": "gpt-3.5-turbo"}, + ) + identifier = ComponentIdentifier( + class_name="MultiChildScorer", + class_module="pyrit.score", + children={"targets": [child_a, child_b]}, + ) + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert "children" in result + assert isinstance(result["children"]["targets"], list) + assert len(result["children"]["targets"]) == 2 + + def test_single_child_unwrapped(self): + """Test that a single child is a scalar hash, not a list.""" + child = ComponentIdentifier( + class_name="OnlyChild", + class_module="pyrit.target", + params={"model_name": "gpt-4"}, + ) + identifier = ComponentIdentifier( + class_name="SingleChildScorer", + class_module="pyrit.score", + children={"target": child}, + ) + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert isinstance(result["children"]["target"], str) + + def test_no_children_key_when_empty(self): + """Test that 'children' key is absent when there are no children.""" + identifier = ComponentIdentifier( + class_name="NoChildScorer", + class_module="pyrit.score", + params={"threshold": 0.5}, + ) + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert "children" not in result + + def test_non_target_children_with_different_params_produce_different_hash(self): + """Test that non-target children differing in any param (including operational) produce different hashes.""" + child1 = ComponentIdentifier( + class_name="SubScorer", + class_module="pyrit.score", + params={"system_prompt_template": "prompt_a", "endpoint": "https://a.com"}, + ) + child2 = ComponentIdentifier( + class_name="SubScorer", + class_module="pyrit.score", + params={"system_prompt_template": "prompt_a", "endpoint": "https://b.com"}, + ) + id1 = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"sub_scorer": child1}, + ) + id2 = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"sub_scorer": child2}, + ) + result1 = _build_eval_dict( + id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + result2 = _build_eval_dict( + id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + assert result1["children"]["sub_scorer"] != result2["children"]["sub_scorer"] + + def test_target_vs_non_target_children_handled_differently(self): + """Test that target children filter params while non-target children keep all params.""" + child = ComponentIdentifier( + class_name="SomeComponent", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": "https://example.com"}, + ) + + id_as_target = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"prompt_target": child}, + ) + id_as_non_target = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"sub_scorer": child}, + ) + + result_target = _build_eval_dict( + id_as_target, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + result_non_target = _build_eval_dict( + id_as_non_target, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + assert result_target["children"]["prompt_target"] != result_non_target["children"]["sub_scorer"] + + def test_converter_target_filtered_like_prompt_target(self): + """Test that converter_target children are also filtered to behavioral params only.""" + child1 = ComponentIdentifier( + class_name="ConverterTarget", + class_module="pyrit.target", + params={"model_name": "gpt-4", "temperature": 0.7, "endpoint": "https://endpoint-a.com"}, + ) + child2 = ComponentIdentifier( + class_name="ConverterTarget", + class_module="pyrit.target", + params={"model_name": "gpt-4", "temperature": 0.7, "endpoint": "https://endpoint-b.com"}, + ) + id1 = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"converter_target": child1}, + ) + id2 = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"converter_target": child2}, + ) + result1 = _build_eval_dict( + id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + result2 = _build_eval_dict( + id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + assert result1["children"]["converter_target"] == result2["children"]["converter_target"] + + +class TestComputeEvalHash: + """Tests for the compute_eval_hash free function.""" + + def test_deterministic_for_same_identifier(self): + """Test that compute_eval_hash returns the same hash for the same identifier.""" + identifier = ComponentIdentifier( + class_name="StableScorer", + class_module="pyrit.score", + params={"threshold": 0.5}, + ) + hash1 = compute_eval_hash( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + hash2 = compute_eval_hash( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert hash1 == hash2 + + def test_returns_hex_string(self): + """Test that compute_eval_hash returns a valid hex string.""" + identifier = ComponentIdentifier( + class_name="HexScorer", + class_module="pyrit.score", + ) + result = compute_eval_hash( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert isinstance(result, str) + assert len(result) == 64 # SHA-256 hex digest + assert all(c in "0123456789abcdef" for c in result) + + def test_different_class_names_produce_different_hashes(self): + """Test that different class names produce different eval hashes.""" + id1 = ComponentIdentifier(class_name="ScorerA", class_module="pyrit.score") + id2 = ComponentIdentifier(class_name="ScorerB", class_module="pyrit.score") + + assert compute_eval_hash( + id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) != compute_eval_hash( + id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + def test_different_params_produce_different_hashes(self): + """Test that different params produce different eval hashes.""" + id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", params={"threshold": 0.5}) + id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", params={"threshold": 0.8}) + + assert compute_eval_hash( + id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) != compute_eval_hash( + id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + def test_eval_hash_differs_from_component_hash(self): + """Test that eval hash differs from component hash when target children have operational params.""" + child = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": "https://example.com"}, + ) + identifier = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"prompt_target": child}, + ) + + eval_hash = compute_eval_hash( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + assert eval_hash != identifier.hash + + def test_operational_child_params_ignored(self): + """Test that operational params on target children don't affect eval hash.""" + child1 = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={ + "model_name": "gpt-4", + "temperature": 0.7, + "endpoint": "https://endpoint-a.com", + "max_requests_per_minute": 50, + }, + ) + child2 = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={ + "model_name": "gpt-4", + "temperature": 0.7, + "endpoint": "https://endpoint-b.com", + "max_requests_per_minute": 200, + }, + ) + id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1}) + id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2}) + + assert compute_eval_hash( + id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) == compute_eval_hash( + id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + def test_behavioral_child_params_affect_eval_hash(self): + """Test that behavioral params on target children do affect eval hash.""" + child1 = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "temperature": 0.7}, + ) + child2 = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "temperature": 0.0}, + ) + id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1}) + id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2}) + + assert compute_eval_hash( + id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) != compute_eval_hash( + id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + def test_scorer_own_params_all_included(self): + """Test that all of the scorer's own params (not just behavioral) are included.""" + id1 = ComponentIdentifier( + class_name="Scorer", class_module="pyrit.score", params={"system_prompt_template": "template_a"} + ) + id2 = ComponentIdentifier( + class_name="Scorer", class_module="pyrit.score", params={"system_prompt_template": "template_b"} + ) + + assert compute_eval_hash( + id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) != compute_eval_hash( + id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + + def test_empty_target_child_keys_returns_component_hash(self): + """Test that empty target_child_keys means no filtering — returns component hash.""" + child = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": "https://example.com"}, + ) + identifier = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"prompt_target": child}, + ) + + result = compute_eval_hash( + identifier, + target_child_keys=frozenset(), + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + assert result == identifier.hash diff --git a/tests/unit/identifiers/test_evaluation_identity.py b/tests/unit/identifiers/test_evaluation_identity.py new file mode 100644 index 0000000000..af572922b8 --- /dev/null +++ b/tests/unit/identifiers/test_evaluation_identity.py @@ -0,0 +1,225 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Tests for pyrit.identifiers.evaluation_identity. + +Covers the ``EvaluationIdentity`` abstract base class, the ``_build_eval_dict`` +helper, and the ``compute_eval_hash`` free function. +""" + +from typing import ClassVar + +import pytest + +from pyrit.identifiers import ComponentIdentifier, compute_eval_hash +from pyrit.identifiers.evaluation_identity import EvaluationIdentity, _build_eval_dict + +# --------------------------------------------------------------------------- +# Concrete subclass for testing the ABC +# --------------------------------------------------------------------------- + + +class _StubEvaluationIdentity(EvaluationIdentity): + """Minimal concrete subclass for testing the abstract base class.""" + + TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"my_target"}) + BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name"}) + + +# --------------------------------------------------------------------------- +# Test constants +# --------------------------------------------------------------------------- + +_TARGET_CHILD_KEYS = frozenset({"prompt_target", "converter_target"}) +_BEHAVIORAL_CHILD_PARAMS = frozenset({"model_name", "temperature", "top_p"}) + + +class TestBuildEvalDict: + """Tests for _build_eval_dict filtering logic.""" + + def test_target_child_params_filtered(self): + """Test that target children only keep behavioral params.""" + child = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": "https://example.com"}, + ) + identifier = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"prompt_target": child}, + ) + + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + # "endpoint" must not appear anywhere in the child sub-dict + assert "endpoint" not in str(result) + assert "children" in result + + def test_non_target_child_params_kept(self): + """Test that non-target children keep all params (full recursive treatment).""" + child = ComponentIdentifier( + class_name="SubScorer", + class_module="pyrit.score", + params={"threshold": 0.5, "extra": "value"}, + ) + identifier = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"sub_scorer": child}, + ) + + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert "children" in result + + def test_no_children_produces_flat_dict(self): + """Test that an identifier with no children produces a dict without 'children' key.""" + identifier = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + params={"threshold": 0.5}, + ) + + result = _build_eval_dict( + identifier, + target_child_keys=_TARGET_CHILD_KEYS, + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + + assert "children" not in result + assert result[ComponentIdentifier.KEY_CLASS_NAME] == "Scorer" + + +class TestComputeEvalHash: + """Tests for the compute_eval_hash free function.""" + + def test_deterministic(self): + """Test that the same identifier + config produces the same hash.""" + identifier = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score") + h1 = compute_eval_hash( + identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + h2 = compute_eval_hash( + identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + assert h1 == h2 + + def test_empty_target_child_keys_returns_component_hash(self): + """Test that empty target_child_keys bypasses filtering and returns component hash.""" + child = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": "https://example.com"}, + ) + identifier = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"prompt_target": child}, + ) + + result = compute_eval_hash( + identifier, + target_child_keys=frozenset(), + behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS, + ) + assert result == identifier.hash + + def test_returns_64_char_hex(self): + """Test that the hash is a 64-char lowercase hex string (SHA-256).""" + identifier = ComponentIdentifier(class_name="S", class_module="m") + result = compute_eval_hash( + identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS + ) + assert len(result) == 64 + assert all(c in "0123456789abcdef" for c in result) + + +class TestEvaluationIdentity: + """Tests for the EvaluationIdentity abstract base class.""" + + def test_identifier_property_returns_original(self): + """Test that .identifier returns the ComponentIdentifier passed at construction.""" + cid = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score") + identity = _StubEvaluationIdentity(cid) + assert identity.identifier is cid + + def test_eval_hash_is_string(self): + """Test that .eval_hash is a valid hex string.""" + cid = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score") + identity = _StubEvaluationIdentity(cid) + assert isinstance(identity.eval_hash, str) + assert len(identity.eval_hash) == 64 + + def test_eval_hash_matches_free_function(self): + """Test that .eval_hash matches calling compute_eval_hash directly.""" + cid = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + params={"threshold": 0.5}, + ) + identity = _StubEvaluationIdentity(cid) + + expected = compute_eval_hash( + cid, + target_child_keys=_StubEvaluationIdentity.TARGET_CHILD_KEYS, + behavioral_child_params=_StubEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS, + ) + assert identity.eval_hash == expected + + def test_eval_hash_differs_from_component_hash_when_target_filtered(self): + """Test that eval hash differs from component hash when target children have operational params.""" + child = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": "https://example.com"}, + ) + cid = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"my_target": child}, + ) + identity = _StubEvaluationIdentity(cid) + + # "endpoint" is operational, so eval hash should differ from full component hash + assert identity.eval_hash != cid.hash + + def test_cannot_instantiate_abc_directly(self): + """Test that EvaluationIdentity cannot be instantiated without ClassVars.""" + with pytest.raises(AttributeError): + EvaluationIdentity(ComponentIdentifier(class_name="X", class_module="m")) # type: ignore[abstract] + + def test_custom_classvars_produce_expected_hash(self): + """Test that a concrete subclass with custom ClassVars produces the correct eval hash.""" + + class CustomIdentity(EvaluationIdentity): + TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"special_target"}) + BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name", "temperature"}) + + child = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "temperature": 0.7, "endpoint": "https://example.com"}, + ) + cid = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + children={"special_target": child}, + ) + identity = CustomIdentity(cid) + + expected = compute_eval_hash( + cid, + target_child_keys=frozenset({"special_target"}), + behavioral_child_params=frozenset({"model_name", "temperature"}), + ) + assert identity.eval_hash == expected diff --git a/tests/unit/score/test_scorer_evaluation_identity.py b/tests/unit/score/test_scorer_evaluation_identity.py new file mode 100644 index 0000000000..cc61acdde6 --- /dev/null +++ b/tests/unit/score/test_scorer_evaluation_identity.py @@ -0,0 +1,151 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Tests for pyrit.score.scorer_evaluation.scorer_evaluation_identity. + +Covers ``ScorerEvaluationIdentity`` ClassVar values, eval-hash delegation, and +the ``Scorer.get_eval_hash()`` convenience method. +""" + +import pytest + +from pyrit.identifiers import ComponentIdentifier, Identifiable, compute_eval_hash +from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity + + +class TestScorerEvaluationIdentityConstants: + """Tests for the ClassVar constants on ScorerEvaluationIdentity.""" + + def test_target_child_keys(self): + """Test that TARGET_CHILD_KEYS contains the expected scorer target names.""" + assert frozenset({"prompt_target", "converter_target"}) == ScorerEvaluationIdentity.TARGET_CHILD_KEYS + + def test_behavioral_child_params(self): + """Test that BEHAVIORAL_CHILD_PARAMS contains the expected behavioral params.""" + assert frozenset({"model_name", "temperature", "top_p"}) == ScorerEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS + + +class TestScorerEvaluationIdentityEvalHash: + """Tests for ScorerEvaluationIdentity eval hash computation.""" + + def test_deterministic(self): + """Test that the same identifier produces the same eval hash.""" + cid = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", params={"threshold": 0.5}) + h1 = ScorerEvaluationIdentity(cid).eval_hash + h2 = ScorerEvaluationIdentity(cid).eval_hash + assert h1 == h2 + + def test_operational_params_ignored(self): + """Test that operational target params don't affect the scorer eval hash.""" + child1 = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": "https://endpoint-a.com"}, + ) + child2 = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": "https://endpoint-b.com"}, + ) + id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1}) + id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2}) + + assert ScorerEvaluationIdentity(id1).eval_hash == ScorerEvaluationIdentity(id2).eval_hash + + def test_behavioral_params_affect_hash(self): + """Test that behavioral target params do affect the scorer eval hash.""" + child1 = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "temperature": 0.7}, + ) + child2 = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "temperature": 0.0}, + ) + id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1}) + id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2}) + + assert ScorerEvaluationIdentity(id1).eval_hash != ScorerEvaluationIdentity(id2).eval_hash + + def test_eval_hash_matches_free_function(self): + """Test that eval_hash matches calling compute_eval_hash with scorer constants.""" + cid = ComponentIdentifier(class_name="MyScorer", class_module="pyrit.score", params={"k": "v"}) + identity = ScorerEvaluationIdentity(cid) + + expected = compute_eval_hash( + cid, + target_child_keys=ScorerEvaluationIdentity.TARGET_CHILD_KEYS, + behavioral_child_params=ScorerEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS, + ) + assert identity.eval_hash == expected + + +@pytest.mark.usefixtures("patch_central_database") +class TestScorerGetEvalHash: + """Tests for Scorer.get_eval_hash() convenience method (adapted from old TestGetEvalHash).""" + + def test_get_eval_hash_uses_scorer_identity(self): + """Test that Scorer.get_eval_hash() delegates to ScorerEvaluationIdentity.""" + + class FakeScorer(Identifiable): + def _build_identifier(self) -> ComponentIdentifier: + child = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": "https://example.com"}, + ) + return ComponentIdentifier.of(self, children={"prompt_target": child}) + + scorer = FakeScorer() + identifier = scorer.get_identifier() + eval_hash = ScorerEvaluationIdentity(identifier).eval_hash + + expected = compute_eval_hash( + identifier, + target_child_keys=ScorerEvaluationIdentity.TARGET_CHILD_KEYS, + behavioral_child_params=ScorerEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS, + ) + assert eval_hash == expected + + def test_get_eval_hash_filters_operational_params(self): + """Test that Scorer.get_eval_hash() filters operational params from target children.""" + + class ScorerLike(Identifiable): + def __init__(self, *, endpoint: str): + self._endpoint = endpoint + + def _build_identifier(self) -> ComponentIdentifier: + child = ComponentIdentifier( + class_name="Target", + class_module="pyrit.target", + params={"model_name": "gpt-4", "endpoint": self._endpoint}, + ) + return ComponentIdentifier.of(self, children={"prompt_target": child}) + + scorer_a = ScorerLike(endpoint="https://endpoint-a.com") + scorer_b = ScorerLike(endpoint="https://endpoint-b.com") + + hash_a = ScorerEvaluationIdentity(scorer_a.get_identifier()).eval_hash + hash_b = ScorerEvaluationIdentity(scorer_b.get_identifier()).eval_hash + + # Different endpoints should produce same eval hash (operational param filtered) + assert hash_a == hash_b + # But different component hashes (endpoint is in full identity) + assert scorer_a.get_identifier().hash != scorer_b.get_identifier().hash + + def test_get_eval_hash_no_target_children_equals_component_hash(self): + """Test that eval hash equals component hash when there are no target children.""" + + class SimpleScorer(Identifiable): + def _build_identifier(self) -> ComponentIdentifier: + return ComponentIdentifier.of(self, params={"key": "value"}) + + scorer = SimpleScorer() + identifier = scorer.get_identifier() + eval_hash = ScorerEvaluationIdentity(identifier).eval_hash + + # No children named "prompt_target" or "converter_target", so no filtering occurs + assert eval_hash == identifier.hash diff --git a/tests/unit/score/test_scorer_evaluator.py b/tests/unit/score/test_scorer_evaluator.py index 46cccaa543..017023292b 100644 --- a/tests/unit/score/test_scorer_evaluator.py +++ b/tests/unit/score/test_scorer_evaluator.py @@ -33,6 +33,7 @@ def mock_harm_scorer(): mock_identifier.hash = "test_hash_456" mock_identifier.system_prompt_template = "test_system_prompt" scorer.get_identifier = MagicMock(return_value=mock_identifier) + scorer.get_eval_hash = MagicMock(return_value="test_hash_456") return scorer @@ -46,6 +47,7 @@ def mock_objective_scorer(): mock_identifier.hash = "test_hash_123" mock_identifier.user_prompt_template = "test_user_prompt" scorer.get_identifier = MagicMock(return_value=mock_identifier) + scorer.get_eval_hash = MagicMock(return_value="test_hash_123") return scorer @@ -187,7 +189,7 @@ def test_compute_harm_metrics_partial_agreement(mock_harm_scorer): assert np.isclose(metrics.mean_absolute_error, 0.1) -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash") def test_should_skip_evaluation_objective_found(mock_find, mock_objective_scorer, tmp_path): """Test skipping evaluation when existing objective metrics have sufficient trials.""" evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer) @@ -220,11 +222,11 @@ def test_should_skip_evaluation_objective_found(mock_find, mock_objective_scorer assert result == expected_metrics mock_find.assert_called_once_with( file_path=result_file, - hash="test_hash_123", + eval_hash="test_hash_123", ) -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash") def test_should_skip_evaluation_objective_not_found(mock_find, mock_objective_scorer, tmp_path): """Test when no existing objective metrics are found in registry.""" evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer) @@ -243,11 +245,11 @@ def test_should_skip_evaluation_objective_not_found(mock_find, mock_objective_sc assert result is None mock_find.assert_called_once_with( file_path=result_file, - hash="test_hash_123", + eval_hash="test_hash_123", ) -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash") def test_should_skip_evaluation_version_changed_runs_evaluation(mock_find, mock_objective_scorer, tmp_path): """Test that different dataset_version triggers re-evaluation (replace existing).""" evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer) @@ -280,7 +282,7 @@ def test_should_skip_evaluation_version_changed_runs_evaluation(mock_find, mock_ assert result is None -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash") def test_should_skip_evaluation_fewer_trials_requested_skips(mock_find, mock_objective_scorer, tmp_path): """Test that requesting fewer trials than existing skips evaluation.""" evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer) @@ -313,7 +315,7 @@ def test_should_skip_evaluation_fewer_trials_requested_skips(mock_find, mock_obj assert result == existing_metrics -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash") def test_should_skip_evaluation_more_trials_requested_runs(mock_find, mock_objective_scorer, tmp_path): """Test that requesting more trials than existing triggers re-evaluation.""" evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer) @@ -346,7 +348,7 @@ def test_should_skip_evaluation_more_trials_requested_runs(mock_find, mock_objec assert result is None -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash") def test_should_skip_evaluation_harm_found(mock_find, mock_harm_scorer, tmp_path): """Test skipping evaluation when existing harm metrics have sufficient trials.""" evaluator = HarmScorerEvaluator(scorer=mock_harm_scorer) @@ -380,12 +382,12 @@ def test_should_skip_evaluation_harm_found(mock_find, mock_harm_scorer, tmp_path assert should_skip is True assert result == expected_metrics mock_find.assert_called_once_with( - hash="test_hash_456", + eval_hash="test_hash_456", harm_category="hate_speech", ) -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash") def test_should_skip_evaluation_harm_missing_category(mock_find, mock_harm_scorer, tmp_path): """Test that missing harm_category returns should not skip.""" evaluator = HarmScorerEvaluator(scorer=mock_harm_scorer) @@ -404,14 +406,14 @@ def test_should_skip_evaluation_harm_missing_category(mock_find, mock_harm_score mock_find.assert_not_called() -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash") def test_should_skip_evaluation_exception_handling(mock_find, mock_objective_scorer, tmp_path): """Test that exceptions are caught and returns (False, None).""" evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer) result_file = tmp_path / "test_results.jsonl" - # Make get_identifier() raise an exception - mock_objective_scorer.get_identifier = MagicMock(side_effect=Exception("Identifier computation failed")) + # Make get_eval_hash() raise an exception + mock_objective_scorer.get_eval_hash = MagicMock(side_effect=Exception("Identifier computation failed")) should_skip, result = evaluator._should_skip_evaluation( dataset_version="1.0", @@ -424,13 +426,11 @@ def test_should_skip_evaluation_exception_handling(mock_find, mock_objective_sco assert result is None mock_find.assert_not_called() - # Restore get_identifier for other tests - mock_identifier = MagicMock() - mock_identifier.hash = "test_hash_123" - mock_objective_scorer.get_identifier = MagicMock(return_value=mock_identifier) + # Restore get_eval_hash for other tests + mock_objective_scorer.get_eval_hash = MagicMock(return_value="test_hash_123") -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash") def test_should_skip_evaluation_harm_definition_version_changed_runs_evaluation(mock_find, mock_harm_scorer, tmp_path): """Test that harm_definition_version change triggers re-evaluation.""" evaluator = HarmScorerEvaluator(scorer=mock_harm_scorer) @@ -469,7 +469,7 @@ def test_should_skip_evaluation_harm_definition_version_changed_runs_evaluation( assert result is None -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash") def test_should_skip_evaluation_harm_definition_version_same_skips(mock_find, mock_harm_scorer, tmp_path): """Test that matching harm_definition_version allows skip when other conditions met.""" evaluator = HarmScorerEvaluator(scorer=mock_harm_scorer) @@ -508,7 +508,7 @@ def test_should_skip_evaluation_harm_definition_version_same_skips(mock_find, mo assert result == existing_metrics -@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash") +@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash") def test_should_skip_evaluation_harm_definition_version_none_in_existing_runs_evaluation( mock_find, mock_harm_scorer, tmp_path ): diff --git a/tests/unit/score/test_scorer_metrics.py b/tests/unit/score/test_scorer_metrics.py index 26c4971aac..7000a9e123 100644 --- a/tests/unit/score/test_scorer_metrics.py +++ b/tests/unit/score/test_scorer_metrics.py @@ -12,8 +12,6 @@ ScorerMetricsWithIdentity, ) from pyrit.score.scorer_evaluation.scorer_metrics_io import ( - _build_eval_dict, - compute_eval_hash, get_all_harm_metrics, get_all_objective_metrics, replace_evaluation_results, @@ -440,6 +438,7 @@ def test_replace_adds_new_entry(self, tmp_path): replace_evaluation_results( file_path=result_file, scorer_identifier=scorer_identifier, + eval_hash=scorer_identifier.hash, metrics=metrics, ) @@ -449,7 +448,7 @@ def test_replace_adds_new_entry(self, tmp_path): assert len(lines) == 1 entry = json.loads(lines[0]) - assert entry["hash"] == scorer_identifier.hash + assert entry["eval_hash"] == scorer_identifier.hash assert entry["metrics"]["accuracy"] == 0.9 def test_replace_replaces_existing_entry(self, tmp_path): @@ -477,6 +476,7 @@ def test_replace_replaces_existing_entry(self, tmp_path): replace_evaluation_results( file_path=result_file, scorer_identifier=scorer_identifier, + eval_hash=scorer_identifier.hash, metrics=initial_metrics, ) @@ -496,6 +496,7 @@ def test_replace_replaces_existing_entry(self, tmp_path): replace_evaluation_results( file_path=result_file, scorer_identifier=scorer_identifier, + eval_hash=scorer_identifier.hash, metrics=updated_metrics, ) @@ -505,7 +506,7 @@ def test_replace_replaces_existing_entry(self, tmp_path): assert len(lines) == 1 entry = json.loads(lines[0]) - assert entry["hash"] == scorer_identifier.hash + assert entry["eval_hash"] == scorer_identifier.hash assert entry["metrics"]["accuracy"] == 0.9 assert entry["metrics"]["num_scorer_trials"] == 5 @@ -532,6 +533,7 @@ def test_replace_preserves_other_entries(self, tmp_path): replace_evaluation_results( file_path=result_file, scorer_identifier=scorer1, + eval_hash=scorer1.hash, metrics=metrics1, ) @@ -554,6 +556,7 @@ def test_replace_preserves_other_entries(self, tmp_path): replace_evaluation_results( file_path=result_file, scorer_identifier=scorer2, + eval_hash=scorer2.hash, metrics=metrics2, ) @@ -572,6 +575,7 @@ def test_replace_preserves_other_entries(self, tmp_path): replace_evaluation_results( file_path=result_file, scorer_identifier=scorer1, + eval_hash=scorer1.hash, metrics=updated_metrics1, ) @@ -581,466 +585,9 @@ def test_replace_preserves_other_entries(self, tmp_path): assert len(lines) == 2 entries = [json.loads(line) for line in lines] - hashes = {e["hash"]: e for e in entries} + hashes = {e["eval_hash"]: e for e in entries} assert scorer1.hash in hashes assert scorer2.hash in hashes assert hashes[scorer1.hash]["metrics"]["accuracy"] == 0.95 assert hashes[scorer2.hash]["metrics"]["accuracy"] == 0.85 - - -class TestBuildEvalDict: - """Tests for the _build_eval_dict function.""" - - def test_basic_identifier_without_params_or_children(self): - """Test _build_eval_dict with a simple identifier with no params or children.""" - identifier = ComponentIdentifier( - class_name="SimpleScorer", - class_module="pyrit.score", - ) - result = _build_eval_dict(identifier) - - assert result["class_name"] == "SimpleScorer" - assert result["class_module"] == "pyrit.score" - assert "children" not in result - - def test_includes_all_non_none_params(self): - """Test that all non-None params are included in the eval dict.""" - identifier = ComponentIdentifier( - class_name="ParamScorer", - class_module="pyrit.score", - params={"threshold": 0.5, "template": "prompt_text", "mode": "strict"}, - ) - result = _build_eval_dict(identifier) - - assert result["threshold"] == 0.5 - assert result["template"] == "prompt_text" - assert result["mode"] == "strict" - - def test_param_allowlist_filters_params(self): - """Test that param_allowlist restricts which params are included.""" - identifier = ComponentIdentifier( - class_name="FilteredScorer", - class_module="pyrit.score", - params={"threshold": 0.5, "template": "prompt_text", "mode": "strict"}, - ) - result = _build_eval_dict(identifier, param_allowlist=frozenset({"threshold", "mode"})) - - assert result["threshold"] == 0.5 - assert result["mode"] == "strict" - assert "template" not in result - - def test_none_params_are_excluded(self): - """Test that None-valued params are excluded from the eval dict.""" - identifier = ComponentIdentifier( - class_name="NoneScorer", - class_module="pyrit.score", - params={"threshold": 0.5, "optional_field": None}, - ) - # Note: ComponentIdentifier filters None in .of(), but direct construction allows it - result = _build_eval_dict(identifier) - - assert result["threshold"] == 0.5 - assert "optional_field" not in result - - def test_children_hashed_with_behavioral_params_only(self): - """Test that target children are projected to behavioral params only.""" - child = ComponentIdentifier( - class_name="ChildTarget", - class_module="pyrit.target", - params={ - "model_name": "gpt-4", - "temperature": 0.7, - "top_p": 0.9, - "max_requests_per_minute": 100, - "endpoint": "https://example.com", - }, - ) - identifier = ComponentIdentifier( - class_name="ParentScorer", - class_module="pyrit.score", - children={"prompt_target": child}, - ) - result = _build_eval_dict(identifier) - - assert "children" in result - # The child hash should be a string (hashed), not the full child dict - assert isinstance(result["children"]["prompt_target"], str) - - def test_children_with_different_operational_params_produce_same_hash(self): - """Test that target children differing only in operational params produce the same child hash.""" - child1 = ComponentIdentifier( - class_name="ChildTarget", - class_module="pyrit.target", - params={ - "model_name": "gpt-4", - "temperature": 0.7, - "endpoint": "https://endpoint-a.com", - "max_requests_per_minute": 50, - }, - ) - child2 = ComponentIdentifier( - class_name="ChildTarget", - class_module="pyrit.target", - params={ - "model_name": "gpt-4", - "temperature": 0.7, - "endpoint": "https://endpoint-b.com", - "max_requests_per_minute": 200, - }, - ) - id1 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child1}, - ) - id2 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child2}, - ) - result1 = _build_eval_dict(id1) - result2 = _build_eval_dict(id2) - - assert result1["children"]["prompt_target"] == result2["children"]["prompt_target"] - - def test_children_with_different_behavioral_params_produce_different_hash(self): - """Test that target children differing in behavioral params produce different child hashes.""" - child1 = ComponentIdentifier( - class_name="ChildTarget", - class_module="pyrit.target", - params={"model_name": "gpt-4", "temperature": 0.7}, - ) - child2 = ComponentIdentifier( - class_name="ChildTarget", - class_module="pyrit.target", - params={"model_name": "gpt-3.5-turbo", "temperature": 0.7}, - ) - id1 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child1}, - ) - id2 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child2}, - ) - result1 = _build_eval_dict(id1) - result2 = _build_eval_dict(id2) - - assert result1["children"]["prompt_target"] != result2["children"]["prompt_target"] - - def test_multiple_children_as_list(self): - """Test that list-valued children produce a list of hashes.""" - child_a = ComponentIdentifier( - class_name="ChildA", - class_module="pyrit.target", - params={"model_name": "gpt-4"}, - ) - child_b = ComponentIdentifier( - class_name="ChildB", - class_module="pyrit.target", - params={"model_name": "gpt-3.5-turbo"}, - ) - identifier = ComponentIdentifier( - class_name="MultiChildScorer", - class_module="pyrit.score", - children={"targets": [child_a, child_b]}, - ) - result = _build_eval_dict(identifier) - - assert "children" in result - assert isinstance(result["children"]["targets"], list) - assert len(result["children"]["targets"]) == 2 - - def test_single_child_list_unwrapped(self): - """Test that a single-element child list is unwrapped to a scalar hash.""" - child = ComponentIdentifier( - class_name="OnlyChild", - class_module="pyrit.target", - params={"model_name": "gpt-4"}, - ) - identifier = ComponentIdentifier( - class_name="SingleChildScorer", - class_module="pyrit.score", - children={"target": child}, - ) - result = _build_eval_dict(identifier) - - # Single child should be a scalar string, not a list - assert isinstance(result["children"]["target"], str) - - def test_no_children_key_when_empty(self): - """Test that 'children' key is absent when there are no children.""" - identifier = ComponentIdentifier( - class_name="NoChildScorer", - class_module="pyrit.score", - params={"threshold": 0.5}, - ) - result = _build_eval_dict(identifier) - - assert "children" not in result - - def test_non_target_children_include_all_params(self): - """Test that non-target children (e.g., sub-scorers) include all params, not just behavioral ones.""" - child = ComponentIdentifier( - class_name="SubScorer", - class_module="pyrit.score", - params={ - "model_name": "gpt-4", - "temperature": 0.7, - "system_prompt_template": "custom_prompt", - "threshold": 0.8, - }, - ) - identifier = ComponentIdentifier( - class_name="ParentScorer", - class_module="pyrit.score", - children={"sub_scorer": child}, - ) - result = _build_eval_dict(identifier) - - assert "children" in result - assert isinstance(result["children"]["sub_scorer"], str) - - def test_non_target_children_with_different_params_produce_different_hash(self): - """Test that non-target children differing in any param produce different hashes.""" - child1 = ComponentIdentifier( - class_name="SubScorer", - class_module="pyrit.score", - params={"system_prompt_template": "prompt_a", "endpoint": "https://a.com"}, - ) - child2 = ComponentIdentifier( - class_name="SubScorer", - class_module="pyrit.score", - params={"system_prompt_template": "prompt_a", "endpoint": "https://b.com"}, - ) - id1 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"sub_scorer": child1}, - ) - id2 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"sub_scorer": child2}, - ) - result1 = _build_eval_dict(id1) - result2 = _build_eval_dict(id2) - - # Non-target children use full eval treatment, so all params matter - assert result1["children"]["sub_scorer"] != result2["children"]["sub_scorer"] - - def test_target_vs_non_target_children_handled_differently(self): - """Test that target children filter params while non-target children keep all params.""" - child = ComponentIdentifier( - class_name="SomeComponent", - class_module="pyrit.target", - params={ - "model_name": "gpt-4", - "endpoint": "https://example.com", - }, - ) - - # Same child as a target child (behavioral filtering applies) - id_as_target = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child}, - ) - # Same child as a non-target child (full eval treatment) - id_as_non_target = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"sub_scorer": child}, - ) - - result_target = _build_eval_dict(id_as_target) - result_non_target = _build_eval_dict(id_as_non_target) - - # The child hashes should differ because target filtering drops "endpoint" - assert result_target["children"]["prompt_target"] != result_non_target["children"]["sub_scorer"] - - def test_converter_target_children_filtered_like_prompt_target(self): - """Test that converter_target children are also filtered to behavioral params only.""" - child1 = ComponentIdentifier( - class_name="ConverterTarget", - class_module="pyrit.target", - params={ - "model_name": "gpt-4", - "temperature": 0.7, - "endpoint": "https://endpoint-a.com", - }, - ) - child2 = ComponentIdentifier( - class_name="ConverterTarget", - class_module="pyrit.target", - params={ - "model_name": "gpt-4", - "temperature": 0.7, - "endpoint": "https://endpoint-b.com", - }, - ) - id1 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"converter_target": child1}, - ) - id2 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"converter_target": child2}, - ) - result1 = _build_eval_dict(id1) - result2 = _build_eval_dict(id2) - - # Operational param "endpoint" should be filtered, so hashes match - assert result1["children"]["converter_target"] == result2["children"]["converter_target"] - - -class TestComputeEvalHash: - """Tests for the compute_eval_hash function.""" - - def test_deterministic_for_same_identifier(self): - """Test that compute_eval_hash returns the same hash for the same identifier.""" - identifier = ComponentIdentifier( - class_name="StableScorer", - class_module="pyrit.score", - params={"threshold": 0.5}, - ) - hash1 = compute_eval_hash(identifier) - hash2 = compute_eval_hash(identifier) - - assert hash1 == hash2 - - def test_returns_hex_string(self): - """Test that compute_eval_hash returns a valid hex string.""" - identifier = ComponentIdentifier( - class_name="HexScorer", - class_module="pyrit.score", - ) - result = compute_eval_hash(identifier) - - assert isinstance(result, str) - assert len(result) == 64 # SHA-256 hex digest - assert all(c in "0123456789abcdef" for c in result) - - def test_different_class_names_produce_different_hashes(self): - """Test that different class names produce different eval hashes.""" - id1 = ComponentIdentifier(class_name="ScorerA", class_module="pyrit.score") - id2 = ComponentIdentifier(class_name="ScorerB", class_module="pyrit.score") - - assert compute_eval_hash(id1) != compute_eval_hash(id2) - - def test_different_params_produce_different_hashes(self): - """Test that different params produce different eval hashes.""" - id1 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - params={"threshold": 0.5}, - ) - id2 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - params={"threshold": 0.8}, - ) - - assert compute_eval_hash(id1) != compute_eval_hash(id2) - - def test_eval_hash_differs_from_component_hash(self): - """Test that eval hash differs from the ComponentIdentifier.hash for target children with operational params.""" - child = ComponentIdentifier( - class_name="Target", - class_module="pyrit.target", - params={ - "model_name": "gpt-4", - "endpoint": "https://example.com", - }, - ) - identifier = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child}, - ) - - eval_hash = compute_eval_hash(identifier) - component_hash = identifier.hash - - # They should differ because eval hash filters operational params from target children - assert eval_hash != component_hash - - def test_operational_child_params_ignored_in_eval_hash(self): - """Test that operational params on target children don't affect eval hash.""" - child1 = ComponentIdentifier( - class_name="Target", - class_module="pyrit.target", - params={ - "model_name": "gpt-4", - "temperature": 0.7, - "endpoint": "https://endpoint-a.com", - "max_requests_per_minute": 50, - }, - ) - child2 = ComponentIdentifier( - class_name="Target", - class_module="pyrit.target", - params={ - "model_name": "gpt-4", - "temperature": 0.7, - "endpoint": "https://endpoint-b.com", - "max_requests_per_minute": 200, - }, - ) - id1 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child1}, - ) - id2 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child2}, - ) - - assert compute_eval_hash(id1) == compute_eval_hash(id2) - - def test_behavioral_child_params_affect_eval_hash(self): - """Test that behavioral params on target children do affect eval hash.""" - child1 = ComponentIdentifier( - class_name="Target", - class_module="pyrit.target", - params={"model_name": "gpt-4", "temperature": 0.7}, - ) - child2 = ComponentIdentifier( - class_name="Target", - class_module="pyrit.target", - params={"model_name": "gpt-4", "temperature": 0.0}, - ) - id1 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child1}, - ) - id2 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - children={"prompt_target": child2}, - ) - - assert compute_eval_hash(id1) != compute_eval_hash(id2) - - def test_scorer_own_params_all_included(self): - """Test that all of the scorer's own params (not just behavioral) are included.""" - id1 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - params={"system_prompt_template": "template_a"}, - ) - id2 = ComponentIdentifier( - class_name="Scorer", - class_module="pyrit.score", - params={"system_prompt_template": "template_b"}, - ) - - assert compute_eval_hash(id1) != compute_eval_hash(id2) diff --git a/tests/unit/target/test_prompt_target_text.py b/tests/unit/target/test_prompt_target_text.py index ceee9eaa00..d6cbb4140a 100644 --- a/tests/unit/target/test_prompt_target_text.py +++ b/tests/unit/target/test_prompt_target_text.py @@ -20,6 +20,7 @@ def sample_entries() -> MutableSequence[MessagePiece]: @pytest.mark.asyncio +@pytest.mark.usefixtures("patch_central_database") async def test_send_prompt_user_no_system(sample_entries: MutableSequence[MessagePiece]): output_stream = io.StringIO() no_op = TextTarget(text_stream=output_stream) @@ -37,6 +38,7 @@ async def test_send_prompt_user_no_system(sample_entries: MutableSequence[Messag @pytest.mark.asyncio +@pytest.mark.usefixtures("patch_central_database") async def test_send_prompt_stream(sample_entries: MutableSequence[MessagePiece]): with NamedTemporaryFile(mode="w+", delete=False) as tmp_file: prompt = "hi, I am a victim chatbot, how can I help?"