Azure · rlundeen2 · Mar 3, 2026 · Mar 2, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/doc/api.rst b/doc/api.rst
@@ -273,7 +273,9 @@ API Reference
 
     class_name_to_snake_case
     ComponentIdentifier
+    compute_eval_hash
     config_hash
+    EvaluationIdentity
     Identifiable
     snake_case_to_class_name
 

diff --git a/pyrit/identifiers/__init__.py b/pyrit/identifiers/__init__.py
@@ -8,10 +8,13 @@
     snake_case_to_class_name,
 )
 from pyrit.identifiers.component_identifier import ComponentIdentifier, Identifiable, config_hash
+from pyrit.identifiers.evaluation_identity import EvaluationIdentity, compute_eval_hash
 
 __all__ = [
     "class_name_to_snake_case",
     "ComponentIdentifier",
+    "compute_eval_hash",
+    "EvaluationIdentity",
     "Identifiable",
     "snake_case_to_class_name",
     "config_hash",

diff --git a/pyrit/identifiers/evaluation_identity.py b/pyrit/identifiers/evaluation_identity.py
@@ -0,0 +1,178 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Evaluation identity and eval-hash computation.
+
+This module provides:
+
+* ``_build_eval_dict`` — builds a filtered dict for eval-hash computation.
+* ``compute_eval_hash`` — free function that computes a behavioral equivalence
+  hash from a ``ComponentIdentifier``.
+* ``EvaluationIdentity`` — abstract base that wraps a ``ComponentIdentifier``
+  with domain-specific eval-hash configuration.  Concrete subclasses declare
+  *which* children are targets and *which* params are behavioral via two
+  ``ClassVar`` frozensets.
+"""
+
+from __future__ import annotations
+
+from abc import ABC
+from typing import Any, ClassVar, Optional
+
+from pyrit.identifiers.component_identifier import ComponentIdentifier, config_hash
+
+
+def _build_eval_dict(
+    identifier: ComponentIdentifier,
+    *,
+    target_child_keys: frozenset[str],
+    behavioral_child_params: frozenset[str],
+    param_allowlist: Optional[frozenset[str]] = None,
+) -> dict[str, Any]:
+    """
+    Build a filtered dictionary for eval-hash computation.
+
+    Includes only behavioral parameters. For child components whose names appear
+    in ``target_child_keys``, only params in ``behavioral_child_params`` are kept
+    (stripping operational params like endpoint, max_requests_per_minute).
+    Non-target children receive full eval treatment recursively.
+
+    Args:
+        identifier (ComponentIdentifier): The component identity to process.
+        target_child_keys (frozenset[str]): Child names that are targets
+            (e.g., ``{"prompt_target", "converter_target"}``).
+        behavioral_child_params (frozenset[str]): Param allowlist applied to
+            target children (e.g., ``{"model_name", "temperature", "top_p"}``).
+        param_allowlist (Optional[frozenset[str]]): If provided, only include
+            params whose keys are in the allowlist. If None, include all params.
+
+    Returns:
+        dict[str, Any]: The filtered dictionary suitable for hashing.
+    """
+    eval_dict: dict[str, Any] = {
+        ComponentIdentifier.KEY_CLASS_NAME: identifier.class_name,
+        ComponentIdentifier.KEY_CLASS_MODULE: identifier.class_module,
+    }
+
+    eval_dict.update(
+        {
+            key: value
+            for key, value in sorted(identifier.params.items())
+            if value is not None and (param_allowlist is None or key in param_allowlist)
+        }
+    )
+
+    if identifier.children:
+        eval_children: dict[str, Any] = {}
+        for name in sorted(identifier.children):
+            child_list = identifier.get_child_list(name)
+            if name in target_child_keys:
+                # Targets: filter to behavioral params only
+                hashes = [
+                    config_hash(
+                        _build_eval_dict(
+                            c,
+                            target_child_keys=target_child_keys,
+                            behavioral_child_params=behavioral_child_params,
+                            param_allowlist=behavioral_child_params,
+                        )
+                    )
+                    for c in child_list
+                ]
+            else:
+                # Non-targets (e.g., sub-scorers): full eval treatment, recurse without param filtering
+                hashes = [
+                    config_hash(
+                        _build_eval_dict(
+                            c,
+                            target_child_keys=target_child_keys,
+                            behavioral_child_params=behavioral_child_params,
+                        )
+                    )
+                    for c in child_list
+                ]
+            eval_children[name] = hashes[0] if len(hashes) == 1 else hashes
+        if eval_children:
+            eval_dict["children"] = eval_children
+
+    return eval_dict
+
+
+def compute_eval_hash(
+    identifier: ComponentIdentifier,
+    *,
+    target_child_keys: frozenset[str],
+    behavioral_child_params: frozenset[str],
+) -> str:
+    """
+    Compute a behavioral equivalence hash for evaluation grouping.
+
+    Unlike ``ComponentIdentifier.hash`` (which includes all params of self and
+    children), the eval hash filters child components that are "targets" to only
+    their behavioral params (e.g., model_name, temperature, top_p), stripping
+    operational params like endpoint or max_requests_per_minute. This ensures the
+    same logical configuration on different deployments produces the same eval hash.
+
+    Non-target children (e.g., sub-scorers) receive full recursive eval treatment.
+
+    When ``target_child_keys`` is empty, no child filtering occurs and the result
+    equals ``identifier.hash``.
+
+    Args:
+        identifier (ComponentIdentifier): The component identity to compute the hash for.
+        target_child_keys (frozenset[str]): Child names that are targets
+            (e.g., ``{"prompt_target", "converter_target"}``).
+        behavioral_child_params (frozenset[str]): Param allowlist for target children
+            (e.g., ``{"model_name", "temperature", "top_p"}``).
+
+    Returns:
+        str: A hex-encoded SHA256 hash suitable for eval registry keying.
+    """
+    if not target_child_keys:
+        return identifier.hash
+
+    eval_dict = _build_eval_dict(
+        identifier,
+        target_child_keys=target_child_keys,
+        behavioral_child_params=behavioral_child_params,
+    )
+    return config_hash(eval_dict)
+
+
+class EvaluationIdentity(ABC):
+    """
+    Wraps a ``ComponentIdentifier`` with domain-specific eval-hash configuration.
+
+    Subclasses must set the two ``ClassVar`` frozensets:
+
+    * ``TARGET_CHILD_KEYS`` — child names whose operational params should be
+      stripped (e.g., ``{"prompt_target", "converter_target"}``).
+    * ``BEHAVIORAL_CHILD_PARAMS`` — param allowlist applied to those target
+      children (e.g., ``{"model_name", "temperature", "top_p"}``).
+
+    The concrete ``eval_hash`` property delegates to the module-level
+    ``compute_eval_hash`` free function.
+    """
+
+    TARGET_CHILD_KEYS: ClassVar[frozenset[str]]
+    BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]]
+
+    def __init__(self, identifier: ComponentIdentifier) -> None:
+        """Wrap a ComponentIdentifier and eagerly compute its eval hash."""
+        self._identifier = identifier
+        self._eval_hash = compute_eval_hash(
+            identifier,
+            target_child_keys=self.TARGET_CHILD_KEYS,
+            behavioral_child_params=self.BEHAVIORAL_CHILD_PARAMS,
+        )
+
+    @property
+    def identifier(self) -> ComponentIdentifier:
+        """The underlying component identity."""
+        return self._identifier
+
+    @property
+    def eval_hash(self) -> str:
+        """Behavioral equivalence hash for evaluation grouping."""
+        return self._eval_hash
diff --git a/pyrit/models/scenario_result.py b/pyrit/models/scenario_result.py
@@ -223,15 +223,14 @@ def get_scorer_evaluation_metrics(self) -> Optional["ScorerMetrics"]:
 
         """
         # import here to avoid circular imports
+        from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-            find_objective_metrics_by_hash,
+            find_objective_metrics_by_eval_hash,
         )
 
         if not self.objective_scorer_identifier:
             return None
 
-        scorer_hash = self.objective_scorer_identifier.hash
-        if not scorer_hash:
-            return None
+        eval_hash = ScorerEvaluationIdentity(self.objective_scorer_identifier).eval_hash
 
-        return find_objective_metrics_by_hash(hash=scorer_hash)
+        return find_objective_metrics_by_eval_hash(eval_hash=eval_hash)
diff --git a/pyrit/score/float_scale/float_scale_scorer.py b/pyrit/score/float_scale/float_scale_scorer.py
@@ -52,14 +52,16 @@ def get_scorer_metrics(self) -> Optional["HarmScorerMetrics"]:
             HarmScorerMetrics: The metrics for this scorer, or None if not found or not configured.
         """
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-            find_harm_metrics_by_hash,
+            find_harm_metrics_by_eval_hash,
         )
 
         if self.evaluation_file_mapping is None or self.evaluation_file_mapping.harm_category is None:
             return None
-        scorer_hash = self.get_identifier().hash
 
-        return find_harm_metrics_by_hash(hash=scorer_hash, harm_category=self.evaluation_file_mapping.harm_category)
+        return find_harm_metrics_by_eval_hash(
+            eval_hash=self.get_eval_hash(),
+            harm_category=self.evaluation_file_mapping.harm_category,
+        )
 
     async def _score_value_with_llm(
         self,

diff --git a/pyrit/score/printer/console_scorer_printer.py b/pyrit/score/printer/console_scorer_printer.py
@@ -100,18 +100,19 @@ def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> N
         Args:
             scorer_identifier (ComponentIdentifier): The scorer identifier to print information for.
         """
+        from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-            find_objective_metrics_by_hash,
+            find_objective_metrics_by_eval_hash,
         )
 
         print()
         self._print_colored(f"{self._indent}📊 Scorer Information", Style.BRIGHT)
         self._print_colored(f"{self._indent * 2}▸ Scorer Identifier", Fore.WHITE)
         self._print_scorer_info(scorer_identifier, indent_level=3)
 
-        # Look up metrics by hash
-        scorer_hash = scorer_identifier.hash
-        metrics = find_objective_metrics_by_hash(hash=scorer_hash)
+        # Look up metrics by eval hash
+        eval_hash = ScorerEvaluationIdentity(scorer_identifier).eval_hash
+        metrics = find_objective_metrics_by_eval_hash(eval_hash=eval_hash)
         self._print_objective_metrics(metrics)
 
     def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_category: str) -> None:
@@ -127,18 +128,19 @@ def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_cate
             scorer_identifier (ComponentIdentifier): The scorer identifier to print information for.
             harm_category (str): The harm category for looking up metrics (e.g., "hate_speech", "violence").
         """
+        from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-            find_harm_metrics_by_hash,
+            find_harm_metrics_by_eval_hash,
         )
 
         print()
         self._print_colored(f"{self._indent}📊 Scorer Information", Style.BRIGHT)
         self._print_colored(f"{self._indent * 2}▸ Scorer Identifier", Fore.WHITE)
         self._print_scorer_info(scorer_identifier, indent_level=3)
 
-        # Look up metrics by hash and harm category
-        scorer_hash = scorer_identifier.hash
-        metrics = find_harm_metrics_by_hash(hash=scorer_hash, harm_category=harm_category)
+        # Look up metrics by eval hash and harm category
+        eval_hash = ScorerEvaluationIdentity(scorer_identifier).eval_hash
+        metrics = find_harm_metrics_by_eval_hash(eval_hash=eval_hash, harm_category=harm_category)
         self._print_harm_metrics(metrics)
 
     def _print_scorer_info(self, scorer_identifier: ComponentIdentifier, *, indent_level: int = 2) -> None:

diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
@@ -55,8 +55,8 @@ class Scorer(Identifiable, abc.ABC):
     Abstract base class for scorers.
     """
 
-    # Evaluation configuration - maps input dataset files to a result file
-    # Specifies glob patterns for datasets and a result file name
+    # Evaluation configuration - maps input dataset files to a result file.
+    # Specifies glob patterns for datasets and a result file name.
     evaluation_file_mapping: Optional[ScorerEvalDatasetFiles] = None
 
     _identifier: Optional[ComponentIdentifier] = None
@@ -70,6 +70,22 @@ def __init__(self, *, validator: ScorerPromptValidator):
         """
         self._validator = validator
 
+    def get_eval_hash(self) -> str:
+        """
+        Compute a behavioral equivalence hash for evaluation grouping.
+
+        Delegates to ``ScorerEvaluationIdentity`` which filters target children
+        (prompt_target, converter_target) to behavioral params only, so the same
+        scorer configuration on different deployments produces the same eval hash.
+
+        Returns:
+            str: A hex-encoded SHA256 hash suitable for eval registry keying.
+        """
+        # Deferred import to avoid circular dependency (scorer_evaluation_identity → identifiers → …)
+        from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
+
+        return ScorerEvaluationIdentity(self.get_identifier()).eval_hash
+
     @property
     def scorer_type(self) -> ScoreType:
         """

diff --git a/pyrit/score/scorer_evaluation/scorer_evaluation_identity.py b/pyrit/score/scorer_evaluation/scorer_evaluation_identity.py
@@ -0,0 +1,28 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Scorer-specific evaluation identity.
+
+``ScorerEvaluationIdentity`` declares which children are "targets" and which
+target params are behavioral for the scorer evaluation domain.
+"""
+
+from __future__ import annotations
+
+from typing import ClassVar
+
+from pyrit.identifiers.evaluation_identity import EvaluationIdentity
+
+
+class ScorerEvaluationIdentity(EvaluationIdentity):
+    """
+    Evaluation identity for scorers.
+
+    Target children (``prompt_target``, ``converter_target``) are filtered to
+    behavioral params only (``model_name``, ``temperature``, ``top_p``), so the
+    same scorer configuration on different deployments produces the same eval hash.
+    """
+
+    TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"prompt_target", "converter_target"})
+    BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name", "temperature", "top_p"})