From c1497c2ec850491d1b80c16608b7c8a824799e57 Mon Sep 17 00:00:00 2001
From: Richard Lundeen <rlundeen@microsoft.com>
Date: Mon, 2 Mar 2026 15:53:08 -0800
Subject: [PATCH 1/5] refactoring eval_hash

---
 pyrit/identifiers/component_identifier.py     | 179 +++++
 pyrit/models/scenario_result.py               |   8 +-
 pyrit/score/float_scale/float_scale_scorer.py |   8 +-
 pyrit/score/printer/console_scorer_printer.py |  16 +-
 pyrit/score/scorer.py                         |  10 +-
 .../scorer_evaluation/scorer_evaluator.py     |  15 +-
 .../scorer_evaluation/scorer_metrics_io.py    | 124 +--
 pyrit/score/true_false/true_false_scorer.py   |   4 +-
 .../identifiers/test_component_identifier.py  | 712 ++++++++++++++++++
 tests/unit/score/test_scorer_evaluator.py     |  40 +-
 tests/unit/score/test_scorer_metrics.py       | 471 +-----------
 11 files changed, 980 insertions(+), 607 deletions(-)

diff --git a/pyrit/identifiers/component_identifier.py b/pyrit/identifiers/component_identifier.py
index fe306053ae..acf9bc57ee 100644
--- a/pyrit/identifiers/component_identifier.py
+++ b/pyrit/identifiers/component_identifier.py
@@ -17,6 +17,7 @@
 from __future__ import annotations
 
 import hashlib
+import importlib
 import json
 import logging
 from abc import ABC, abstractmethod
@@ -98,6 +99,78 @@ def _build_hash_dict(
     return hash_dict
 
 
+def _build_eval_dict(
+    identifier: ComponentIdentifier,
+    *,
+    target_child_keys: frozenset[str],
+    behavioral_child_params: frozenset[str],
+    param_allowlist: Optional[frozenset[str]] = None,
+) -> dict[str, Any]:
+    """
+    Build a filtered dictionary for eval-hash computation.
+
+    Includes only behavioral parameters. For child components whose names appear
+    in ``target_child_keys``, only params in ``behavioral_child_params`` are kept
+    (stripping operational params like endpoint, max_requests_per_minute).
+    Non-target children receive full eval treatment recursively.
+
+    Args:
+        identifier (ComponentIdentifier): The component identity to process.
+        target_child_keys (frozenset[str]): Child names that are targets
+            (e.g., ``{"prompt_target", "converter_target"}``).
+        behavioral_child_params (frozenset[str]): Param allowlist applied to
+            target children (e.g., ``{"model_name", "temperature", "top_p"}``).
+        param_allowlist (Optional[frozenset[str]]): If provided, only include
+            params whose keys are in the allowlist. If None, include all params.
+
+    Returns:
+        dict[str, Any]: The filtered dictionary suitable for hashing.
+    """
+    eval_dict: dict[str, Any] = {
+        ComponentIdentifier.KEY_CLASS_NAME: identifier.class_name,
+        ComponentIdentifier.KEY_CLASS_MODULE: identifier.class_module,
+    }
+
+    for key, value in sorted(identifier.params.items()):
+        if value is not None and (param_allowlist is None or key in param_allowlist):
+            eval_dict[key] = value
+
+    if identifier.children:
+        eval_children: dict[str, Any] = {}
+        for name in sorted(identifier.children):
+            child_list = identifier.get_child_list(name)
+            if name in target_child_keys:
+                # Targets: filter to behavioral params only
+                hashes = [
+                    config_hash(
+                        _build_eval_dict(
+                            c,
+                            target_child_keys=target_child_keys,
+                            behavioral_child_params=behavioral_child_params,
+                            param_allowlist=behavioral_child_params,
+                        )
+                    )
+                    for c in child_list
+                ]
+            else:
+                # Non-targets (e.g., sub-scorers): full eval treatment, recurse without param filtering
+                hashes = [
+                    config_hash(
+                        _build_eval_dict(
+                            c,
+                            target_child_keys=target_child_keys,
+                            behavioral_child_params=behavioral_child_params,
+                        )
+                    )
+                    for c in child_list
+                ]
+            eval_children[name] = hashes[0] if len(hashes) == 1 else hashes
+        if eval_children:
+            eval_dict["children"] = eval_children
+
+    return eval_dict
+
+
 @dataclass(frozen=True)
 class ComponentIdentifier:
     """
@@ -165,6 +238,82 @@ def unique_name(self) -> str:
         """
         return f"{self.class_name}::{self.short_hash}"
 
+    def compute_eval_hash(
+        self,
+        *,
+        target_child_keys: Optional[frozenset[str]] = None,
+        behavioral_child_params: Optional[frozenset[str]] = None,
+    ) -> str:
+        """
+        Compute a behavioral equivalence hash for evaluation grouping.
+
+        Unlike ``hash`` (which includes all params of self and children), the eval
+        hash filters child components that are "targets" to only their behavioral
+        params (e.g., model_name, temperature, top_p), stripping operational params
+        like endpoint or max_requests_per_minute. This ensures the same logical
+        configuration on different deployments produces the same eval hash.
+
+        Non-target children (e.g., sub-scorers) receive full recursive eval treatment.
+
+        When ``target_child_keys`` and ``behavioral_child_params`` are not provided,
+        the method dynamically resolves the class from ``class_module`` / ``class_name``
+        and reads its ``EVAL_TARGET_CHILD_KEYS`` and ``EVAL_BEHAVIORAL_CHILD_PARAMS``
+        ClassVars. If the class cannot be resolved, returns ``self.hash``.
+
+        When ``target_child_keys`` is empty (explicitly or from the resolved class),
+        no child filtering occurs and the result equals ``self.hash``.
+
+        Args:
+            target_child_keys (Optional[frozenset[str]]): Child names that are targets.
+                If None, resolved dynamically from the component class.
+            behavioral_child_params (Optional[frozenset[str]]): Param allowlist for
+                target children. If None, resolved dynamically from the component class.
+
+        Returns:
+            str: A hex-encoded SHA256 hash suitable for eval registry keying.
+        """
+        if target_child_keys is None or behavioral_child_params is None:
+            resolved_keys, resolved_params = self._resolve_eval_config()
+            if target_child_keys is None:
+                target_child_keys = resolved_keys
+            if behavioral_child_params is None:
+                behavioral_child_params = resolved_params
+
+        if not target_child_keys:
+            return self.hash
+
+        eval_dict = _build_eval_dict(
+            self,
+            target_child_keys=target_child_keys,
+            behavioral_child_params=behavioral_child_params,
+        )
+        return config_hash(eval_dict)
+
+    def _resolve_eval_config(self) -> tuple[frozenset[str], frozenset[str]]:
+        """
+        Dynamically resolve eval-hash configuration from the component class.
+
+        Uses ``importlib`` to import ``self.class_module`` and look up
+        ``self.class_name`` to read ``EVAL_TARGET_CHILD_KEYS`` and
+        ``EVAL_BEHAVIORAL_CHILD_PARAMS`` ClassVars. Returns empty frozensets
+        if the class cannot be resolved.
+
+        Returns:
+            Tuple of (target_child_keys, behavioral_child_params) frozensets.
+        """
+        empty: tuple[frozenset[str], frozenset[str]] = (frozenset(), frozenset())
+        try:
+            module = importlib.import_module(self.class_module)
+            cls = getattr(module, self.class_name, None)
+            if cls is None:
+                return empty
+            return (
+                getattr(cls, "EVAL_TARGET_CHILD_KEYS", frozenset()),
+                getattr(cls, "EVAL_BEHAVIORAL_CHILD_PARAMS", frozenset()),
+            )
+        except Exception:
+            return empty
+
     @classmethod
     def of(
         cls,
@@ -452,8 +601,21 @@ class Identifiable(ABC):
     Components implement ``_build_identifier()`` to return a frozen ComponentIdentifier
     snapshot. The identifier is built lazily on first access and cached for the
     component's lifetime.
+
+    Subclasses that participate in evaluation grouping (e.g., scorers, attack strategies)
+    should override ``EVAL_TARGET_CHILD_KEYS`` and ``EVAL_BEHAVIORAL_CHILD_PARAMS`` to
+    declare which children are "targets" and which target params are behavioral. The
+    ``get_eval_hash()`` convenience method uses these ClassVars to compute a behavioral
+    equivalence hash via ``ComponentIdentifier.compute_eval_hash()``.
     """
 
+    # Override in subclasses to declare which children are "targets" whose operational
+    # params (endpoint, max_requests_per_minute, etc.) should be stripped for eval hashing.
+    EVAL_TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset()
+    # Override in subclasses to declare which target child params are behavioral
+    # (kept in eval hash). Only used when EVAL_TARGET_CHILD_KEYS is non-empty.
+    EVAL_BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset()
+
     _identifier: Optional[ComponentIdentifier] = None
 
     @abstractmethod
@@ -488,3 +650,20 @@ def get_identifier(self) -> ComponentIdentifier:
         if self._identifier is None:
             self._identifier = self._build_identifier()
         return self._identifier
+
+    def get_eval_hash(self) -> str:
+        """
+        Compute a behavioral equivalence hash for evaluation grouping.
+
+        Uses the class-level ``EVAL_TARGET_CHILD_KEYS`` and
+        ``EVAL_BEHAVIORAL_CHILD_PARAMS`` to determine which children are targets
+        and which target params are behavioral. When both are empty (the default),
+        returns ``self.get_identifier().hash`` — equivalent to the full identity hash.
+
+        Returns:
+            str: A hex-encoded SHA256 hash suitable for eval registry keying.
+        """
+        return self.get_identifier().compute_eval_hash(
+            target_child_keys=self.EVAL_TARGET_CHILD_KEYS,
+            behavioral_child_params=self.EVAL_BEHAVIORAL_CHILD_PARAMS,
+        )
diff --git a/pyrit/models/scenario_result.py b/pyrit/models/scenario_result.py
index 52669fc033..5d8f1cb7d8 100644
--- a/pyrit/models/scenario_result.py
+++ b/pyrit/models/scenario_result.py
@@ -225,14 +225,12 @@ def get_scorer_evaluation_metrics(self) -> Optional["ScorerMetrics"]:
         """
         # import here to avoid circular imports
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-            find_objective_metrics_by_hash,
+            find_objective_metrics_by_eval_hash,
         )
 
         if not self.objective_scorer_identifier:
             return None
 
-        scorer_hash = self.objective_scorer_identifier.hash
-        if not scorer_hash:
-            return None
+        eval_hash = self.objective_scorer_identifier.compute_eval_hash()
 
-        return find_objective_metrics_by_hash(hash=scorer_hash)
+        return find_objective_metrics_by_eval_hash(eval_hash=eval_hash)
diff --git a/pyrit/score/float_scale/float_scale_scorer.py b/pyrit/score/float_scale/float_scale_scorer.py
index 2fc9a732ed..d09290490b 100644
--- a/pyrit/score/float_scale/float_scale_scorer.py
+++ b/pyrit/score/float_scale/float_scale_scorer.py
@@ -52,14 +52,16 @@ def get_scorer_metrics(self) -> Optional["HarmScorerMetrics"]:
             HarmScorerMetrics: The metrics for this scorer, or None if not found or not configured.
         """
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-            find_harm_metrics_by_hash,
+            find_harm_metrics_by_eval_hash,
         )
 
         if self.evaluation_file_mapping is None or self.evaluation_file_mapping.harm_category is None:
             return None
-        scorer_hash = self.get_identifier().hash
 
-        return find_harm_metrics_by_hash(hash=scorer_hash, harm_category=self.evaluation_file_mapping.harm_category)
+        return find_harm_metrics_by_eval_hash(
+            eval_hash=self.get_eval_hash(),
+            harm_category=self.evaluation_file_mapping.harm_category,
+        )
 
     async def _score_value_with_llm(
         self,
diff --git a/pyrit/score/printer/console_scorer_printer.py b/pyrit/score/printer/console_scorer_printer.py
index bce9566d77..05a3e18ded 100644
--- a/pyrit/score/printer/console_scorer_printer.py
+++ b/pyrit/score/printer/console_scorer_printer.py
@@ -101,7 +101,7 @@ def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> N
             scorer_identifier (ComponentIdentifier): The scorer identifier to print information for.
         """
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-            find_objective_metrics_by_hash,
+            find_objective_metrics_by_eval_hash,
         )
 
         print()
@@ -109,9 +109,9 @@ def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> N
         self._print_colored(f"{self._indent * 2}▸ Scorer Identifier", Fore.WHITE)
         self._print_scorer_info(scorer_identifier, indent_level=3)
 
-        # Look up metrics by hash
-        scorer_hash = scorer_identifier.hash
-        metrics = find_objective_metrics_by_hash(hash=scorer_hash)
+        # Look up metrics by eval hash
+        eval_hash = scorer_identifier.compute_eval_hash()
+        metrics = find_objective_metrics_by_eval_hash(eval_hash=eval_hash)
         self._print_objective_metrics(metrics)
 
     def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_category: str) -> None:
@@ -128,7 +128,7 @@ def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_cate
             harm_category (str): The harm category for looking up metrics (e.g., "hate_speech", "violence").
         """
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-            find_harm_metrics_by_hash,
+            find_harm_metrics_by_eval_hash,
         )
 
         print()
@@ -136,9 +136,9 @@ def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_cate
         self._print_colored(f"{self._indent * 2}▸ Scorer Identifier", Fore.WHITE)
         self._print_scorer_info(scorer_identifier, indent_level=3)
 
-        # Look up metrics by hash and harm category
-        scorer_hash = scorer_identifier.hash
-        metrics = find_harm_metrics_by_hash(hash=scorer_hash, harm_category=harm_category)
+        # Look up metrics by eval hash and harm category
+        eval_hash = scorer_identifier.compute_eval_hash()
+        metrics = find_harm_metrics_by_eval_hash(eval_hash=eval_hash, harm_category=harm_category)
         self._print_harm_metrics(metrics)
 
     def _print_scorer_info(self, scorer_identifier: ComponentIdentifier, *, indent_level: int = 2) -> None:
diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
index bd5a1b2294..f6196350a3 100644
--- a/pyrit/score/scorer.py
+++ b/pyrit/score/scorer.py
@@ -13,6 +13,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    ClassVar,
     Optional,
     Union,
     cast,
@@ -54,8 +55,13 @@ class Scorer(Identifiable, abc.ABC):
     Abstract base class for scorers.
     """
 
-    # Evaluation configuration - maps input dataset files to a result file
-    # Specifies glob patterns for datasets and a result file name
+    # Eval-hash configuration: which children are "targets" (operational params stripped)
+    # and which target params are behavioral (kept in eval hash).
+    EVAL_TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"prompt_target", "converter_target"})
+    EVAL_BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name", "temperature", "top_p"})
+
+    # Evaluation configuration - maps input dataset files to a result file.
+    # Specifies glob patterns for datasets and a result file name.
     evaluation_file_mapping: Optional[ScorerEvalDatasetFiles] = None
 
     _identifier: Optional[ComponentIdentifier] = None
diff --git a/pyrit/score/scorer_evaluation/scorer_evaluator.py b/pyrit/score/scorer_evaluation/scorer_evaluator.py
index 2ff8022d65..a611b9c68c 100644
--- a/pyrit/score/scorer_evaluation/scorer_evaluator.py
+++ b/pyrit/score/scorer_evaluation/scorer_evaluator.py
@@ -32,8 +32,8 @@
     ScorerMetrics,
 )
 from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-    find_harm_metrics_by_hash,
-    find_objective_metrics_by_hash,
+    find_harm_metrics_by_eval_hash,
+    find_objective_metrics_by_eval_hash,
     replace_evaluation_results,
 )
 from pyrit.score.true_false.true_false_scorer import TrueFalseScorer
@@ -272,7 +272,7 @@ def _should_skip_evaluation(
                 - (False, None) if should run evaluation
         """
         try:
-            scorer_hash = self.scorer.get_identifier().hash
+            scorer_hash = self.scorer.get_eval_hash()
 
             # Determine if this is a harm or objective evaluation
             metrics_type = MetricsType.OBJECTIVE if isinstance(self.scorer, TrueFalseScorer) else MetricsType.HARM
@@ -282,14 +282,14 @@ def _should_skip_evaluation(
                 if harm_category is None:
                     logger.warning("harm_category must be provided for harm scorer evaluations")
                     return (False, None)
-                existing = find_harm_metrics_by_hash(
-                    hash=scorer_hash,
+                existing = find_harm_metrics_by_eval_hash(
+                    eval_hash=scorer_hash,
                     harm_category=harm_category,
                 )
             else:
-                existing = find_objective_metrics_by_hash(
+                existing = find_objective_metrics_by_eval_hash(
                     file_path=result_file_path,
-                    hash=scorer_hash,
+                    eval_hash=scorer_hash,
                 )
 
             if not existing:
@@ -484,6 +484,7 @@ def _write_metrics_to_registry(
             replace_evaluation_results(
                 file_path=result_file_path,
                 scorer_identifier=self.scorer.get_identifier(),
+                eval_hash=self.scorer.get_eval_hash(),
                 metrics=metrics,
             )
         except Exception as e:
diff --git a/pyrit/score/scorer_evaluation/scorer_metrics_io.py b/pyrit/score/scorer_evaluation/scorer_metrics_io.py
index 07c9f83bd9..71bcc01e7c 100644
--- a/pyrit/score/scorer_evaluation/scorer_metrics_io.py
+++ b/pyrit/score/scorer_evaluation/scorer_metrics_io.py
@@ -16,7 +16,7 @@
 from pyrit.common.path import (
     SCORER_EVALS_PATH,
 )
-from pyrit.identifiers import ComponentIdentifier, config_hash
+from pyrit.identifiers import ComponentIdentifier
 from pyrit.score.scorer_evaluation.scorer_metrics import (
     HarmScorerMetrics,
     ObjectiveScorerMetrics,
@@ -32,79 +32,6 @@
 
 M = TypeVar("M", bound=ScorerMetrics)
 
-# Child component params that affect scoring behavior.
-# Operational params (endpoint, max_requests_per_minute, etc.) are excluded
-# so that the same model on different deployments shares cached eval results.
-_BEHAVIORAL_CHILD_PARAMS = frozenset({"model_name", "temperature", "top_p"})
-_TARGET_CHILD_KEYS = frozenset({"prompt_target", "converter_target"})
-
-
-def _build_eval_dict(
-    identifier: ComponentIdentifier,
-    *,
-    param_allowlist: Optional[frozenset[str]] = None,
-) -> dict[str, Any]:
-    """
-    Build a dictionary for eval hashing.
-
-    This function creates a filtered representation of a component's configuration,
-    including only behavioral parameters. For child components that are targets,
-    only behavioral params are included. For non-target children, full evaluation
-    treatment is applied recursively.
-
-    Args:
-        identifier (ComponentIdentifier): The component identity to process.
-        param_allowlist (Optional[frozenset[str]]): If provided, only include
-            params whose keys are in the allowlist. If None, include all params.
-            Target children are filtered to _BEHAVIORAL_CHILD_PARAMS, while
-            non-target children receive full eval treatment without param filtering.
-
-    Returns:
-        Dict[str, Any]: The filtered dictionary suitable for hashing.
-    """
-    eval_dict: dict[str, Any] = {
-        ComponentIdentifier.KEY_CLASS_NAME: identifier.class_name,
-        ComponentIdentifier.KEY_CLASS_MODULE: identifier.class_module,
-    }
-
-    for key, value in sorted(identifier.params.items()):
-        if value is not None and (param_allowlist is None or key in param_allowlist):
-            eval_dict[key] = value
-
-    if identifier.children:
-        eval_children: dict[str, Any] = {}
-        for name in sorted(identifier.children):
-            child_list = identifier.get_child_list(name)
-            if name in _TARGET_CHILD_KEYS:
-                # Targets: filter to behavioral params only
-                hashes = [
-                    config_hash(_build_eval_dict(c, param_allowlist=_BEHAVIORAL_CHILD_PARAMS)) for c in child_list
-                ]
-            else:
-                # Non-targets (e.g., sub-scorers): full eval treatment, recurse without param filtering
-                hashes = [config_hash(_build_eval_dict(c)) for c in child_list]
-            eval_children[name] = hashes[0] if len(hashes) == 1 else hashes
-        if eval_children:
-            eval_dict["children"] = eval_children
-
-    return eval_dict
-
-
-def compute_eval_hash(identifier: ComponentIdentifier) -> str:
-    """
-    Compute a behavioral equivalence hash for scorer evaluation grouping.
-
-    Includes all of the scorer's own params but projects child components
-    down to only behavioral params (model_name, temperature, top_p).
-
-    Args:
-        identifier (ComponentIdentifier): The scorer's full identity.
-
-    Returns:
-        str: A hash suitable for eval registry keying.
-    """
-    return config_hash(_build_eval_dict(identifier))
-
 
 def _metrics_to_registry_dict(metrics: ScorerMetrics) -> dict[str, Any]:
     """
@@ -221,16 +148,16 @@ def _load_metrics_from_file(
     return results
 
 
-def find_objective_metrics_by_hash(
+def find_objective_metrics_by_eval_hash(
     *,
-    hash: str,
+    eval_hash: str,
     file_path: Optional[Path] = None,
 ) -> Optional[ObjectiveScorerMetrics]:
     """
-    Find objective scorer metrics by configuration hash.
+    Find objective scorer metrics by evaluation hash.
 
     Args:
-        hash (str): The scorer configuration hash to search for.
+        eval_hash (str): The scorer evaluation hash to search for.
         file_path (Optional[Path]): Path to the JSONL file to search.
             If not provided, uses the default path:
             SCORER_EVALS_PATH / "objective" / "objective_achieved_metrics.jsonl"
@@ -241,42 +168,43 @@ def find_objective_metrics_by_hash(
     if file_path is None:
         file_path = SCORER_EVALS_PATH / "objective" / "objective_achieved_metrics.jsonl"
 
-    return _find_metrics_by_hash(file_path=file_path, hash=hash, metrics_class=ObjectiveScorerMetrics)
+    return _find_metrics_by_eval_hash(file_path=file_path, eval_hash=eval_hash, metrics_class=ObjectiveScorerMetrics)
 
 
-def find_harm_metrics_by_hash(
+def find_harm_metrics_by_eval_hash(
     *,
-    hash: str,
+    eval_hash: str,
     harm_category: str,
 ) -> Optional[HarmScorerMetrics]:
     """
-    Find harm scorer metrics by configuration hash.
+    Find harm scorer metrics by evaluation hash.
 
     Args:
-        hash (str): The scorer configuration hash to search for.
+        eval_hash (str): The scorer evaluation hash to search for.
         harm_category (str): The harm category to search in (e.g., "hate_speech", "violence").
 
     Returns:
         HarmScorerMetrics if found, else None.
     """
     file_path = SCORER_EVALS_PATH / "harm" / f"{harm_category}_metrics.jsonl"
-    return _find_metrics_by_hash(file_path=file_path, hash=hash, metrics_class=HarmScorerMetrics)
+    return _find_metrics_by_eval_hash(file_path=file_path, eval_hash=eval_hash, metrics_class=HarmScorerMetrics)
 
 
-def _find_metrics_by_hash(
+def _find_metrics_by_eval_hash(
     *,
     file_path: Path,
-    hash: str,
+    eval_hash: str,
     metrics_class: type[M],
 ) -> Optional[M]:
     """
-    Find scorer metrics by configuration hash in a specific file.
+    Find scorer metrics by evaluation hash in a specific file.
 
-    This is a private helper function used by find_objective_metrics_by_hash and find_harm_metrics_by_hash.
+    This is a private helper function used by find_objective_metrics_by_eval_hash
+    and find_harm_metrics_by_eval_hash.
 
     Args:
         file_path (Path): Path to the JSONL file to search.
-        hash (str): The scorer configuration hash to search for.
+        eval_hash (str): The scorer evaluation hash to search for.
         metrics_class (Type[M]): The metrics class to instantiate.
 
     Returns:
@@ -285,14 +213,14 @@ def _find_metrics_by_hash(
     entries = _load_jsonl(file_path)
 
     for entry in entries:
-        if entry.get("hash") == hash:
+        if entry.get("eval_hash") == eval_hash:
             metrics_dict = entry.get("metrics", {})
             # Filter out internal fields that have init=False (e.g., _harm_definition_obj)
             metrics_dict = {k: v for k, v in metrics_dict.items() if not k.startswith("_")}
             try:
                 return metrics_class(**metrics_dict)
             except Exception as e:
-                logger.warning(f"Failed to parse metrics for hash {hash}: {e}")
+                logger.warning(f"Failed to parse metrics for eval_hash {eval_hash}: {e}")
                 return None
 
     return None
@@ -302,6 +230,7 @@ def add_evaluation_results(
     *,
     file_path: Path,
     scorer_identifier: ComponentIdentifier,
+    eval_hash: str,
     metrics: "ScorerMetrics",
 ) -> None:
     """
@@ -313,6 +242,7 @@ def add_evaluation_results(
     Args:
         file_path (Path): The full path to the JSONL file to append to.
         scorer_identifier (ComponentIdentifier): The scorer's configuration identifier.
+        eval_hash (str): The pre-computed evaluation hash for grouping.
         metrics (ScorerMetrics): The computed metrics (ObjectiveScorerMetrics or HarmScorerMetrics).
     """
     # Get or create lock for this file path
@@ -320,8 +250,6 @@ def add_evaluation_results(
     if file_path_str not in _file_write_locks:
         _file_write_locks[file_path_str] = threading.Lock()
 
-    eval_hash = compute_eval_hash(scorer_identifier)
-
     # Build entry dictionary
     entry = scorer_identifier.to_dict()
     entry["eval_hash"] = eval_hash
@@ -390,18 +318,20 @@ def replace_evaluation_results(
     *,
     file_path: Path,
     scorer_identifier: ComponentIdentifier,
+    eval_hash: str,
     metrics: "ScorerMetrics",
 ) -> None:
     """
-    Replace existing scorer metrics entry (by hash) with new metrics, or add if not exists.
+    Replace existing scorer metrics entry (by eval_hash) with new metrics, or add if not exists.
 
-    This is an atomic operation that removes any existing entry with the same scorer hash
-    and adds the new entry. Only one entry per scorer hash is maintained in the registry,
+    This is an atomic operation that removes any existing entry with the same eval_hash
+    and adds the new entry. Only one entry per eval_hash is maintained in the registry,
     ensuring we always track the highest-fidelity evaluation.
 
     Args:
         file_path (Path): The full path to the JSONL file.
         scorer_identifier (ComponentIdentifier): The scorer's configuration identifier.
+        eval_hash (str): The pre-computed evaluation hash for grouping.
         metrics (ScorerMetrics): The computed metrics (ObjectiveScorerMetrics or HarmScorerMetrics).
     """
     # Get or create lock for this file path
@@ -409,8 +339,6 @@ def replace_evaluation_results(
     if file_path_str not in _file_write_locks:
         _file_write_locks[file_path_str] = threading.Lock()
 
-    eval_hash = compute_eval_hash(scorer_identifier)
-
     # Build new entry dictionary
     new_entry = scorer_identifier.to_dict()
     new_entry["eval_hash"] = eval_hash
diff --git a/pyrit/score/true_false/true_false_scorer.py b/pyrit/score/true_false/true_false_scorer.py
index 1d61be4977..671dd57973 100644
--- a/pyrit/score/true_false/true_false_scorer.py
+++ b/pyrit/score/true_false/true_false_scorer.py
@@ -83,7 +83,7 @@ def get_scorer_metrics(self) -> Optional["ObjectiveScorerMetrics"]:
         """
         from pyrit.common.path import SCORER_EVALS_PATH
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-            find_objective_metrics_by_hash,
+            find_objective_metrics_by_eval_hash,
         )
 
         if self.evaluation_file_mapping is None:
@@ -94,7 +94,7 @@ def get_scorer_metrics(self) -> Optional["ObjectiveScorerMetrics"]:
         if not result_file.exists():
             return None
 
-        return find_objective_metrics_by_hash(hash=self.get_identifier().hash, file_path=result_file)
+        return find_objective_metrics_by_eval_hash(eval_hash=self.get_eval_hash(), file_path=result_file)
 
     async def _score_async(self, message: Message, *, objective: Optional[str] = None) -> list[Score]:
         """
diff --git a/tests/unit/identifiers/test_component_identifier.py b/tests/unit/identifiers/test_component_identifier.py
index cb8dbe858e..a727c3e212 100644
--- a/tests/unit/identifiers/test_component_identifier.py
+++ b/tests/unit/identifiers/test_component_identifier.py
@@ -2,10 +2,17 @@
 # Licensed under the MIT license.
 
 
+from typing import ClassVar
+
 import pytest
 
 import pyrit
 from pyrit.identifiers import ComponentIdentifier, Identifiable, config_hash
+from pyrit.identifiers.component_identifier import _build_eval_dict
+
+# Test constants mirroring Scorer's ClassVars — keeps tests decoupled from pyrit.score
+_TARGET_CHILD_KEYS = frozenset({"prompt_target", "converter_target"})
+_BEHAVIORAL_CHILD_PARAMS = frozenset({"model_name", "temperature", "top_p"})
 
 
 class TestComponentIdentifierCreation:
@@ -714,3 +721,708 @@ def _build_identifier(self) -> ComponentIdentifier:
         assert isinstance(identifier, ComponentIdentifier)
         assert identifier.class_name == "MyComponent"
         assert identifier.params["key"] == "val"
+
+
+class TestResolveEvalConfig:
+    """Tests for ComponentIdentifier._resolve_eval_config dynamic class lookup."""
+
+    def test_resolves_scorer_class_vars(self):
+        """Test that _resolve_eval_config returns Scorer's ClassVars for a Scorer identifier."""
+        from pyrit.score import Scorer
+
+        identifier = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score.scorer",
+        )
+        keys, params = identifier._resolve_eval_config()
+
+        assert keys == Scorer.EVAL_TARGET_CHILD_KEYS
+        assert params == Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS
+        # Verify specific members to catch accidental ClassVar drift
+        assert "prompt_target" in keys
+        assert "converter_target" in keys
+        assert "model_name" in params
+        assert "temperature" in params
+        assert "top_p" in params
+
+    def test_returns_empty_for_unresolvable_module(self):
+        """Test that _resolve_eval_config returns empty frozensets for a module that cannot be imported."""
+        identifier = ComponentIdentifier(
+            class_name="DoesNotExist",
+            class_module="no.such.module.exists",
+        )
+        keys, params = identifier._resolve_eval_config()
+
+        assert keys == frozenset()
+        assert params == frozenset()
+
+    def test_returns_empty_for_nonexistent_class(self):
+        """Test that _resolve_eval_config returns empty frozensets when the class name doesn't exist in the module."""
+        identifier = ComponentIdentifier(
+            class_name="NoSuchClassName",
+            class_module="pyrit.score.scorer",
+        )
+        keys, params = identifier._resolve_eval_config()
+
+        assert keys == frozenset()
+        assert params == frozenset()
+
+    def test_returns_empty_for_class_without_classvars(self):
+        """Test that _resolve_eval_config returns empty frozensets when the class has no eval ClassVars."""
+        identifier = ComponentIdentifier(
+            class_name="ComponentIdentifier",
+            class_module="pyrit.identifiers.component_identifier",
+        )
+        keys, params = identifier._resolve_eval_config()
+
+        assert keys == frozenset()
+        assert params == frozenset()
+
+    def test_returns_identifiable_defaults_for_base_class(self):
+        """Test that _resolve_eval_config returns Identifiable's default empty frozensets."""
+        identifier = ComponentIdentifier(
+            class_name="Identifiable",
+            class_module="pyrit.identifiers.component_identifier",
+        )
+        keys, params = identifier._resolve_eval_config()
+
+        assert keys == frozenset()
+        assert params == frozenset()
+
+
+class TestBuildEvalDict:
+    """Tests for the _build_eval_dict function."""
+
+    def test_basic_identifier_without_params_or_children(self):
+        """Test _build_eval_dict with a simple identifier with no params or children."""
+        identifier = ComponentIdentifier(
+            class_name="SimpleScorer",
+            class_module="pyrit.score",
+        )
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert result["class_name"] == "SimpleScorer"
+        assert result["class_module"] == "pyrit.score"
+        assert "children" not in result
+
+    def test_includes_all_non_none_params(self):
+        """Test that all non-None params are included in the eval dict."""
+        identifier = ComponentIdentifier(
+            class_name="ParamScorer",
+            class_module="pyrit.score",
+            params={"threshold": 0.5, "template": "prompt_text", "mode": "strict"},
+        )
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert result["threshold"] == 0.5
+        assert result["template"] == "prompt_text"
+        assert result["mode"] == "strict"
+
+    def test_param_allowlist_filters_params(self):
+        """Test that param_allowlist restricts which params are included."""
+        identifier = ComponentIdentifier(
+            class_name="FilteredScorer",
+            class_module="pyrit.score",
+            params={"threshold": 0.5, "template": "prompt_text", "mode": "strict"},
+        )
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+            param_allowlist=frozenset({"threshold", "mode"}),
+        )
+
+        assert result["threshold"] == 0.5
+        assert result["mode"] == "strict"
+        assert "template" not in result
+
+    def test_none_params_are_excluded(self):
+        """Test that None-valued params are excluded from the eval dict."""
+        identifier = ComponentIdentifier(
+            class_name="NoneScorer",
+            class_module="pyrit.score",
+            params={"threshold": 0.5, "optional_field": None},
+        )
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert result["threshold"] == 0.5
+        assert "optional_field" not in result
+
+    def test_target_children_hashed_with_behavioral_params_only(self):
+        """Test that target children are projected to behavioral params only."""
+        child = ComponentIdentifier(
+            class_name="ChildTarget",
+            class_module="pyrit.target",
+            params={
+                "model_name": "gpt-4",
+                "temperature": 0.7,
+                "top_p": 0.9,
+                "max_requests_per_minute": 100,
+                "endpoint": "https://example.com",
+            },
+        )
+        identifier = ComponentIdentifier(
+            class_name="ParentScorer",
+            class_module="pyrit.score",
+            children={"prompt_target": child},
+        )
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert "children" in result
+        assert isinstance(result["children"]["prompt_target"], str)
+
+    def test_target_children_same_behavioral_different_operational_produce_same_hash(self):
+        """Test that target children differing only in operational params produce the same child hash."""
+        child1 = ComponentIdentifier(
+            class_name="ChildTarget",
+            class_module="pyrit.target",
+            params={
+                "model_name": "gpt-4",
+                "temperature": 0.7,
+                "endpoint": "https://endpoint-a.com",
+                "max_requests_per_minute": 50,
+            },
+        )
+        child2 = ComponentIdentifier(
+            class_name="ChildTarget",
+            class_module="pyrit.target",
+            params={
+                "model_name": "gpt-4",
+                "temperature": 0.7,
+                "endpoint": "https://endpoint-b.com",
+                "max_requests_per_minute": 200,
+            },
+        )
+        id1 = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"prompt_target": child1},
+        )
+        id2 = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"prompt_target": child2},
+        )
+        result1 = _build_eval_dict(
+            id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+        result2 = _build_eval_dict(
+            id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+
+        assert result1["children"]["prompt_target"] == result2["children"]["prompt_target"]
+
+    def test_target_children_different_behavioral_produce_different_hash(self):
+        """Test that target children differing in behavioral params produce different child hashes."""
+        child1 = ComponentIdentifier(
+            class_name="ChildTarget",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "temperature": 0.7},
+        )
+        child2 = ComponentIdentifier(
+            class_name="ChildTarget",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-3.5-turbo", "temperature": 0.7},
+        )
+        id1 = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"prompt_target": child1},
+        )
+        id2 = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"prompt_target": child2},
+        )
+        result1 = _build_eval_dict(
+            id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+        result2 = _build_eval_dict(
+            id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+
+        assert result1["children"]["prompt_target"] != result2["children"]["prompt_target"]
+
+    def test_multiple_children_as_list(self):
+        """Test that list-valued children produce a list of hashes."""
+        child_a = ComponentIdentifier(
+            class_name="ChildA",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4"},
+        )
+        child_b = ComponentIdentifier(
+            class_name="ChildB",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-3.5-turbo"},
+        )
+        identifier = ComponentIdentifier(
+            class_name="MultiChildScorer",
+            class_module="pyrit.score",
+            children={"targets": [child_a, child_b]},
+        )
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert "children" in result
+        assert isinstance(result["children"]["targets"], list)
+        assert len(result["children"]["targets"]) == 2
+
+    def test_single_child_unwrapped(self):
+        """Test that a single child is a scalar hash, not a list."""
+        child = ComponentIdentifier(
+            class_name="OnlyChild",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4"},
+        )
+        identifier = ComponentIdentifier(
+            class_name="SingleChildScorer",
+            class_module="pyrit.score",
+            children={"target": child},
+        )
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert isinstance(result["children"]["target"], str)
+
+    def test_no_children_key_when_empty(self):
+        """Test that 'children' key is absent when there are no children."""
+        identifier = ComponentIdentifier(
+            class_name="NoChildScorer",
+            class_module="pyrit.score",
+            params={"threshold": 0.5},
+        )
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert "children" not in result
+
+    def test_non_target_children_with_different_params_produce_different_hash(self):
+        """Test that non-target children differing in any param (including operational) produce different hashes."""
+        child1 = ComponentIdentifier(
+            class_name="SubScorer",
+            class_module="pyrit.score",
+            params={"system_prompt_template": "prompt_a", "endpoint": "https://a.com"},
+        )
+        child2 = ComponentIdentifier(
+            class_name="SubScorer",
+            class_module="pyrit.score",
+            params={"system_prompt_template": "prompt_a", "endpoint": "https://b.com"},
+        )
+        id1 = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"sub_scorer": child1},
+        )
+        id2 = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"sub_scorer": child2},
+        )
+        result1 = _build_eval_dict(
+            id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+        result2 = _build_eval_dict(
+            id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+
+        assert result1["children"]["sub_scorer"] != result2["children"]["sub_scorer"]
+
+    def test_target_vs_non_target_children_handled_differently(self):
+        """Test that target children filter params while non-target children keep all params."""
+        child = ComponentIdentifier(
+            class_name="SomeComponent",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "endpoint": "https://example.com"},
+        )
+
+        id_as_target = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"prompt_target": child},
+        )
+        id_as_non_target = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"sub_scorer": child},
+        )
+
+        result_target = _build_eval_dict(
+            id_as_target, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+        result_non_target = _build_eval_dict(
+            id_as_non_target, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+
+        assert result_target["children"]["prompt_target"] != result_non_target["children"]["sub_scorer"]
+
+    def test_converter_target_filtered_like_prompt_target(self):
+        """Test that converter_target children are also filtered to behavioral params only."""
+        child1 = ComponentIdentifier(
+            class_name="ConverterTarget",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "temperature": 0.7, "endpoint": "https://endpoint-a.com"},
+        )
+        child2 = ComponentIdentifier(
+            class_name="ConverterTarget",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "temperature": 0.7, "endpoint": "https://endpoint-b.com"},
+        )
+        id1 = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"converter_target": child1},
+        )
+        id2 = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"converter_target": child2},
+        )
+        result1 = _build_eval_dict(
+            id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+        result2 = _build_eval_dict(
+            id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+
+        assert result1["children"]["converter_target"] == result2["children"]["converter_target"]
+
+
+class TestComputeEvalHash:
+    """Tests for ComponentIdentifier.compute_eval_hash (explicit and zero-arg paths)."""
+
+    # --- Explicit-arg tests ---
+
+    def test_deterministic_for_same_identifier(self):
+        """Test that compute_eval_hash returns the same hash for the same identifier."""
+        identifier = ComponentIdentifier(
+            class_name="StableScorer",
+            class_module="pyrit.score",
+            params={"threshold": 0.5},
+        )
+        hash1 = identifier.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+        hash2 = identifier.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert hash1 == hash2
+
+    def test_returns_hex_string(self):
+        """Test that compute_eval_hash returns a valid hex string."""
+        identifier = ComponentIdentifier(
+            class_name="HexScorer",
+            class_module="pyrit.score",
+        )
+        result = identifier.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert isinstance(result, str)
+        assert len(result) == 64  # SHA-256 hex digest
+        assert all(c in "0123456789abcdef" for c in result)
+
+    def test_different_class_names_produce_different_hashes(self):
+        """Test that different class names produce different eval hashes."""
+        id1 = ComponentIdentifier(class_name="ScorerA", class_module="pyrit.score")
+        id2 = ComponentIdentifier(class_name="ScorerB", class_module="pyrit.score")
+
+        assert id1.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        ) != id2.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+    def test_different_params_produce_different_hashes(self):
+        """Test that different params produce different eval hashes."""
+        id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", params={"threshold": 0.5})
+        id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", params={"threshold": 0.8})
+
+        assert id1.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        ) != id2.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+    def test_eval_hash_differs_from_component_hash(self):
+        """Test that eval hash differs from hash when target children have operational params."""
+        child = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "endpoint": "https://example.com"},
+        )
+        identifier = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"prompt_target": child},
+        )
+
+        eval_hash = identifier.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+        assert eval_hash != identifier.hash
+
+    def test_operational_child_params_ignored(self):
+        """Test that operational params on target children don't affect eval hash."""
+        child1 = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={
+                "model_name": "gpt-4",
+                "temperature": 0.7,
+                "endpoint": "https://endpoint-a.com",
+                "max_requests_per_minute": 50,
+            },
+        )
+        child2 = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={
+                "model_name": "gpt-4",
+                "temperature": 0.7,
+                "endpoint": "https://endpoint-b.com",
+                "max_requests_per_minute": 200,
+            },
+        )
+        id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1})
+        id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2})
+
+        assert id1.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        ) == id2.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+    def test_behavioral_child_params_affect_eval_hash(self):
+        """Test that behavioral params on target children do affect eval hash."""
+        child1 = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "temperature": 0.7},
+        )
+        child2 = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "temperature": 0.0},
+        )
+        id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1})
+        id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2})
+
+        assert id1.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        ) != id2.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+    def test_scorer_own_params_all_included(self):
+        """Test that all of the scorer's own params (not just behavioral) are included."""
+        id1 = ComponentIdentifier(
+            class_name="Scorer", class_module="pyrit.score", params={"system_prompt_template": "template_a"}
+        )
+        id2 = ComponentIdentifier(
+            class_name="Scorer", class_module="pyrit.score", params={"system_prompt_template": "template_b"}
+        )
+
+        assert id1.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        ) != id2.compute_eval_hash(
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+    # --- Zero-arg / dynamic-lookup tests ---
+
+    def test_zero_arg_matches_explicit_args_for_scorer(self):
+        """Test that zero-arg compute_eval_hash matches explicit-arg version for Scorer class."""
+        from pyrit.score import Scorer
+
+        child = ComponentIdentifier(
+            class_name="OpenAIChatTarget",
+            class_module="pyrit.prompt_target.openai.openai_chat_target",
+            params={"model_name": "gpt-4", "endpoint": "https://api.example.com"},
+        )
+        identifier = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score.scorer",
+            children={"prompt_target": child},
+        )
+
+        zero_arg_hash = identifier.compute_eval_hash()
+        explicit_hash = identifier.compute_eval_hash(
+            target_child_keys=Scorer.EVAL_TARGET_CHILD_KEYS,
+            behavioral_child_params=Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert zero_arg_hash == explicit_hash
+
+    def test_zero_arg_returns_self_hash_for_unresolvable_class(self):
+        """Test that zero-arg compute_eval_hash falls back to self.hash when class is unresolvable."""
+        identifier = ComponentIdentifier(
+            class_name="NonExistentScorer",
+            class_module="no.such.module",
+            params={"threshold": 0.5},
+        )
+
+        assert identifier.compute_eval_hash() == identifier.hash
+
+    def test_zero_arg_returns_self_hash_for_class_without_classvars(self):
+        """Test that zero-arg compute_eval_hash returns self.hash when class has no eval ClassVars."""
+        identifier = ComponentIdentifier(
+            class_name="ComponentIdentifier",
+            class_module="pyrit.identifiers.component_identifier",
+            params={"some_param": "value"},
+        )
+
+        assert identifier.compute_eval_hash() == identifier.hash
+
+    def test_partial_args_target_keys_only(self):
+        """Test compute_eval_hash with only target_child_keys provided (behavioral_child_params resolved)."""
+        from pyrit.score import Scorer
+
+        child = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "endpoint": "https://api.example.com"},
+        )
+        identifier = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score.scorer",
+            children={"prompt_target": child},
+        )
+
+        partial_hash = identifier.compute_eval_hash(
+            target_child_keys=Scorer.EVAL_TARGET_CHILD_KEYS,
+        )
+        full_hash = identifier.compute_eval_hash(
+            target_child_keys=Scorer.EVAL_TARGET_CHILD_KEYS,
+            behavioral_child_params=Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert partial_hash == full_hash
+
+    def test_partial_args_behavioral_params_only(self):
+        """Test compute_eval_hash with only behavioral_child_params provided (target_child_keys resolved)."""
+        from pyrit.score import Scorer
+
+        child = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "endpoint": "https://api.example.com"},
+        )
+        identifier = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score.scorer",
+            children={"prompt_target": child},
+        )
+
+        partial_hash = identifier.compute_eval_hash(
+            behavioral_child_params=Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS,
+        )
+        full_hash = identifier.compute_eval_hash(
+            target_child_keys=Scorer.EVAL_TARGET_CHILD_KEYS,
+            behavioral_child_params=Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert partial_hash == full_hash
+
+
+class TestGetEvalHash:
+    """Tests for Identifiable.get_eval_hash convenience method."""
+
+    def test_get_eval_hash_uses_classvars(self):
+        """Test that get_eval_hash passes ClassVar overrides to compute_eval_hash."""
+
+        class FakeScorer(Identifiable):
+            EVAL_TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"my_target"})
+            EVAL_BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name"})
+
+            def _build_identifier(self) -> ComponentIdentifier:
+                child = ComponentIdentifier(
+                    class_name="Target",
+                    class_module="pyrit.target",
+                    params={"model_name": "gpt-4", "endpoint": "https://example.com"},
+                )
+                return ComponentIdentifier.of(self, children={"my_target": child})
+
+        scorer = FakeScorer()
+        eval_hash = scorer.get_eval_hash()
+
+        expected = scorer.get_identifier().compute_eval_hash(
+            target_child_keys=frozenset({"my_target"}),
+            behavioral_child_params=frozenset({"model_name"}),
+        )
+        assert eval_hash == expected
+
+    def test_get_eval_hash_equals_component_hash_when_no_classvars(self):
+        """Test that get_eval_hash returns component hash when no ClassVar overrides."""
+
+        class SimpleComponent(Identifiable):
+            def _build_identifier(self) -> ComponentIdentifier:
+                return ComponentIdentifier.of(self, params={"key": "value"})
+
+        component = SimpleComponent()
+        assert component.get_eval_hash() == component.get_identifier().hash
+
+    def test_get_eval_hash_filters_operational_params(self):
+        """Test that get_eval_hash filters operational params from target children."""
+
+        class ScorerLike(Identifiable):
+            EVAL_TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"target"})
+            EVAL_BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name"})
+
+            def __init__(self, *, endpoint: str):
+                self._endpoint = endpoint
+
+            def _build_identifier(self) -> ComponentIdentifier:
+                child = ComponentIdentifier(
+                    class_name="Target",
+                    class_module="pyrit.target",
+                    params={"model_name": "gpt-4", "endpoint": self._endpoint},
+                )
+                return ComponentIdentifier.of(self, children={"target": child})
+
+        scorer_a = ScorerLike(endpoint="https://endpoint-a.com")
+        scorer_b = ScorerLike(endpoint="https://endpoint-b.com")
+
+        # Different endpoints should produce same eval hash (operational param filtered)
+        assert scorer_a.get_eval_hash() == scorer_b.get_eval_hash()
+        # But different component hashes (endpoint is in full identity)
+        assert scorer_a.get_identifier().hash != scorer_b.get_identifier().hash
diff --git a/tests/unit/score/test_scorer_evaluator.py b/tests/unit/score/test_scorer_evaluator.py
index 46cccaa543..017023292b 100644
--- a/tests/unit/score/test_scorer_evaluator.py
+++ b/tests/unit/score/test_scorer_evaluator.py
@@ -33,6 +33,7 @@ def mock_harm_scorer():
     mock_identifier.hash = "test_hash_456"
     mock_identifier.system_prompt_template = "test_system_prompt"
     scorer.get_identifier = MagicMock(return_value=mock_identifier)
+    scorer.get_eval_hash = MagicMock(return_value="test_hash_456")
     return scorer
 
 
@@ -46,6 +47,7 @@ def mock_objective_scorer():
     mock_identifier.hash = "test_hash_123"
     mock_identifier.user_prompt_template = "test_user_prompt"
     scorer.get_identifier = MagicMock(return_value=mock_identifier)
+    scorer.get_eval_hash = MagicMock(return_value="test_hash_123")
     return scorer
 
 
@@ -187,7 +189,7 @@ def test_compute_harm_metrics_partial_agreement(mock_harm_scorer):
     assert np.isclose(metrics.mean_absolute_error, 0.1)
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash")
 def test_should_skip_evaluation_objective_found(mock_find, mock_objective_scorer, tmp_path):
     """Test skipping evaluation when existing objective metrics have sufficient trials."""
     evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer)
@@ -220,11 +222,11 @@ def test_should_skip_evaluation_objective_found(mock_find, mock_objective_scorer
     assert result == expected_metrics
     mock_find.assert_called_once_with(
         file_path=result_file,
-        hash="test_hash_123",
+        eval_hash="test_hash_123",
     )
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash")
 def test_should_skip_evaluation_objective_not_found(mock_find, mock_objective_scorer, tmp_path):
     """Test when no existing objective metrics are found in registry."""
     evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer)
@@ -243,11 +245,11 @@ def test_should_skip_evaluation_objective_not_found(mock_find, mock_objective_sc
     assert result is None
     mock_find.assert_called_once_with(
         file_path=result_file,
-        hash="test_hash_123",
+        eval_hash="test_hash_123",
     )
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash")
 def test_should_skip_evaluation_version_changed_runs_evaluation(mock_find, mock_objective_scorer, tmp_path):
     """Test that different dataset_version triggers re-evaluation (replace existing)."""
     evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer)
@@ -280,7 +282,7 @@ def test_should_skip_evaluation_version_changed_runs_evaluation(mock_find, mock_
     assert result is None
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash")
 def test_should_skip_evaluation_fewer_trials_requested_skips(mock_find, mock_objective_scorer, tmp_path):
     """Test that requesting fewer trials than existing skips evaluation."""
     evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer)
@@ -313,7 +315,7 @@ def test_should_skip_evaluation_fewer_trials_requested_skips(mock_find, mock_obj
     assert result == existing_metrics
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash")
 def test_should_skip_evaluation_more_trials_requested_runs(mock_find, mock_objective_scorer, tmp_path):
     """Test that requesting more trials than existing triggers re-evaluation."""
     evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer)
@@ -346,7 +348,7 @@ def test_should_skip_evaluation_more_trials_requested_runs(mock_find, mock_objec
     assert result is None
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash")
 def test_should_skip_evaluation_harm_found(mock_find, mock_harm_scorer, tmp_path):
     """Test skipping evaluation when existing harm metrics have sufficient trials."""
     evaluator = HarmScorerEvaluator(scorer=mock_harm_scorer)
@@ -380,12 +382,12 @@ def test_should_skip_evaluation_harm_found(mock_find, mock_harm_scorer, tmp_path
     assert should_skip is True
     assert result == expected_metrics
     mock_find.assert_called_once_with(
-        hash="test_hash_456",
+        eval_hash="test_hash_456",
         harm_category="hate_speech",
     )
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash")
 def test_should_skip_evaluation_harm_missing_category(mock_find, mock_harm_scorer, tmp_path):
     """Test that missing harm_category returns should not skip."""
     evaluator = HarmScorerEvaluator(scorer=mock_harm_scorer)
@@ -404,14 +406,14 @@ def test_should_skip_evaluation_harm_missing_category(mock_find, mock_harm_score
     mock_find.assert_not_called()
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_objective_metrics_by_eval_hash")
 def test_should_skip_evaluation_exception_handling(mock_find, mock_objective_scorer, tmp_path):
     """Test that exceptions are caught and returns (False, None)."""
     evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer)
     result_file = tmp_path / "test_results.jsonl"
 
-    # Make get_identifier() raise an exception
-    mock_objective_scorer.get_identifier = MagicMock(side_effect=Exception("Identifier computation failed"))
+    # Make get_eval_hash() raise an exception
+    mock_objective_scorer.get_eval_hash = MagicMock(side_effect=Exception("Identifier computation failed"))
 
     should_skip, result = evaluator._should_skip_evaluation(
         dataset_version="1.0",
@@ -424,13 +426,11 @@ def test_should_skip_evaluation_exception_handling(mock_find, mock_objective_sco
     assert result is None
     mock_find.assert_not_called()
 
-    # Restore get_identifier for other tests
-    mock_identifier = MagicMock()
-    mock_identifier.hash = "test_hash_123"
-    mock_objective_scorer.get_identifier = MagicMock(return_value=mock_identifier)
+    # Restore get_eval_hash for other tests
+    mock_objective_scorer.get_eval_hash = MagicMock(return_value="test_hash_123")
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash")
 def test_should_skip_evaluation_harm_definition_version_changed_runs_evaluation(mock_find, mock_harm_scorer, tmp_path):
     """Test that harm_definition_version change triggers re-evaluation."""
     evaluator = HarmScorerEvaluator(scorer=mock_harm_scorer)
@@ -469,7 +469,7 @@ def test_should_skip_evaluation_harm_definition_version_changed_runs_evaluation(
     assert result is None
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash")
 def test_should_skip_evaluation_harm_definition_version_same_skips(mock_find, mock_harm_scorer, tmp_path):
     """Test that matching harm_definition_version allows skip when other conditions met."""
     evaluator = HarmScorerEvaluator(scorer=mock_harm_scorer)
@@ -508,7 +508,7 @@ def test_should_skip_evaluation_harm_definition_version_same_skips(mock_find, mo
     assert result == existing_metrics
 
 
-@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_hash")
+@patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash")
 def test_should_skip_evaluation_harm_definition_version_none_in_existing_runs_evaluation(
     mock_find, mock_harm_scorer, tmp_path
 ):
diff --git a/tests/unit/score/test_scorer_metrics.py b/tests/unit/score/test_scorer_metrics.py
index 26c4971aac..7000a9e123 100644
--- a/tests/unit/score/test_scorer_metrics.py
+++ b/tests/unit/score/test_scorer_metrics.py
@@ -12,8 +12,6 @@
     ScorerMetricsWithIdentity,
 )
 from pyrit.score.scorer_evaluation.scorer_metrics_io import (
-    _build_eval_dict,
-    compute_eval_hash,
     get_all_harm_metrics,
     get_all_objective_metrics,
     replace_evaluation_results,
@@ -440,6 +438,7 @@ def test_replace_adds_new_entry(self, tmp_path):
         replace_evaluation_results(
             file_path=result_file,
             scorer_identifier=scorer_identifier,
+            eval_hash=scorer_identifier.hash,
             metrics=metrics,
         )
 
@@ -449,7 +448,7 @@ def test_replace_adds_new_entry(self, tmp_path):
 
         assert len(lines) == 1
         entry = json.loads(lines[0])
-        assert entry["hash"] == scorer_identifier.hash
+        assert entry["eval_hash"] == scorer_identifier.hash
         assert entry["metrics"]["accuracy"] == 0.9
 
     def test_replace_replaces_existing_entry(self, tmp_path):
@@ -477,6 +476,7 @@ def test_replace_replaces_existing_entry(self, tmp_path):
         replace_evaluation_results(
             file_path=result_file,
             scorer_identifier=scorer_identifier,
+            eval_hash=scorer_identifier.hash,
             metrics=initial_metrics,
         )
 
@@ -496,6 +496,7 @@ def test_replace_replaces_existing_entry(self, tmp_path):
         replace_evaluation_results(
             file_path=result_file,
             scorer_identifier=scorer_identifier,
+            eval_hash=scorer_identifier.hash,
             metrics=updated_metrics,
         )
 
@@ -505,7 +506,7 @@ def test_replace_replaces_existing_entry(self, tmp_path):
 
         assert len(lines) == 1
         entry = json.loads(lines[0])
-        assert entry["hash"] == scorer_identifier.hash
+        assert entry["eval_hash"] == scorer_identifier.hash
         assert entry["metrics"]["accuracy"] == 0.9
         assert entry["metrics"]["num_scorer_trials"] == 5
 
@@ -532,6 +533,7 @@ def test_replace_preserves_other_entries(self, tmp_path):
         replace_evaluation_results(
             file_path=result_file,
             scorer_identifier=scorer1,
+            eval_hash=scorer1.hash,
             metrics=metrics1,
         )
 
@@ -554,6 +556,7 @@ def test_replace_preserves_other_entries(self, tmp_path):
         replace_evaluation_results(
             file_path=result_file,
             scorer_identifier=scorer2,
+            eval_hash=scorer2.hash,
             metrics=metrics2,
         )
 
@@ -572,6 +575,7 @@ def test_replace_preserves_other_entries(self, tmp_path):
         replace_evaluation_results(
             file_path=result_file,
             scorer_identifier=scorer1,
+            eval_hash=scorer1.hash,
             metrics=updated_metrics1,
         )
 
@@ -581,466 +585,9 @@ def test_replace_preserves_other_entries(self, tmp_path):
 
         assert len(lines) == 2
         entries = [json.loads(line) for line in lines]
-        hashes = {e["hash"]: e for e in entries}
+        hashes = {e["eval_hash"]: e for e in entries}
 
         assert scorer1.hash in hashes
         assert scorer2.hash in hashes
         assert hashes[scorer1.hash]["metrics"]["accuracy"] == 0.95
         assert hashes[scorer2.hash]["metrics"]["accuracy"] == 0.85
-
-
-class TestBuildEvalDict:
-    """Tests for the _build_eval_dict function."""
-
-    def test_basic_identifier_without_params_or_children(self):
-        """Test _build_eval_dict with a simple identifier with no params or children."""
-        identifier = ComponentIdentifier(
-            class_name="SimpleScorer",
-            class_module="pyrit.score",
-        )
-        result = _build_eval_dict(identifier)
-
-        assert result["class_name"] == "SimpleScorer"
-        assert result["class_module"] == "pyrit.score"
-        assert "children" not in result
-
-    def test_includes_all_non_none_params(self):
-        """Test that all non-None params are included in the eval dict."""
-        identifier = ComponentIdentifier(
-            class_name="ParamScorer",
-            class_module="pyrit.score",
-            params={"threshold": 0.5, "template": "prompt_text", "mode": "strict"},
-        )
-        result = _build_eval_dict(identifier)
-
-        assert result["threshold"] == 0.5
-        assert result["template"] == "prompt_text"
-        assert result["mode"] == "strict"
-
-    def test_param_allowlist_filters_params(self):
-        """Test that param_allowlist restricts which params are included."""
-        identifier = ComponentIdentifier(
-            class_name="FilteredScorer",
-            class_module="pyrit.score",
-            params={"threshold": 0.5, "template": "prompt_text", "mode": "strict"},
-        )
-        result = _build_eval_dict(identifier, param_allowlist=frozenset({"threshold", "mode"}))
-
-        assert result["threshold"] == 0.5
-        assert result["mode"] == "strict"
-        assert "template" not in result
-
-    def test_none_params_are_excluded(self):
-        """Test that None-valued params are excluded from the eval dict."""
-        identifier = ComponentIdentifier(
-            class_name="NoneScorer",
-            class_module="pyrit.score",
-            params={"threshold": 0.5, "optional_field": None},
-        )
-        # Note: ComponentIdentifier filters None in .of(), but direct construction allows it
-        result = _build_eval_dict(identifier)
-
-        assert result["threshold"] == 0.5
-        assert "optional_field" not in result
-
-    def test_children_hashed_with_behavioral_params_only(self):
-        """Test that target children are projected to behavioral params only."""
-        child = ComponentIdentifier(
-            class_name="ChildTarget",
-            class_module="pyrit.target",
-            params={
-                "model_name": "gpt-4",
-                "temperature": 0.7,
-                "top_p": 0.9,
-                "max_requests_per_minute": 100,
-                "endpoint": "https://example.com",
-            },
-        )
-        identifier = ComponentIdentifier(
-            class_name="ParentScorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child},
-        )
-        result = _build_eval_dict(identifier)
-
-        assert "children" in result
-        # The child hash should be a string (hashed), not the full child dict
-        assert isinstance(result["children"]["prompt_target"], str)
-
-    def test_children_with_different_operational_params_produce_same_hash(self):
-        """Test that target children differing only in operational params produce the same child hash."""
-        child1 = ComponentIdentifier(
-            class_name="ChildTarget",
-            class_module="pyrit.target",
-            params={
-                "model_name": "gpt-4",
-                "temperature": 0.7,
-                "endpoint": "https://endpoint-a.com",
-                "max_requests_per_minute": 50,
-            },
-        )
-        child2 = ComponentIdentifier(
-            class_name="ChildTarget",
-            class_module="pyrit.target",
-            params={
-                "model_name": "gpt-4",
-                "temperature": 0.7,
-                "endpoint": "https://endpoint-b.com",
-                "max_requests_per_minute": 200,
-            },
-        )
-        id1 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child1},
-        )
-        id2 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child2},
-        )
-        result1 = _build_eval_dict(id1)
-        result2 = _build_eval_dict(id2)
-
-        assert result1["children"]["prompt_target"] == result2["children"]["prompt_target"]
-
-    def test_children_with_different_behavioral_params_produce_different_hash(self):
-        """Test that target children differing in behavioral params produce different child hashes."""
-        child1 = ComponentIdentifier(
-            class_name="ChildTarget",
-            class_module="pyrit.target",
-            params={"model_name": "gpt-4", "temperature": 0.7},
-        )
-        child2 = ComponentIdentifier(
-            class_name="ChildTarget",
-            class_module="pyrit.target",
-            params={"model_name": "gpt-3.5-turbo", "temperature": 0.7},
-        )
-        id1 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child1},
-        )
-        id2 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child2},
-        )
-        result1 = _build_eval_dict(id1)
-        result2 = _build_eval_dict(id2)
-
-        assert result1["children"]["prompt_target"] != result2["children"]["prompt_target"]
-
-    def test_multiple_children_as_list(self):
-        """Test that list-valued children produce a list of hashes."""
-        child_a = ComponentIdentifier(
-            class_name="ChildA",
-            class_module="pyrit.target",
-            params={"model_name": "gpt-4"},
-        )
-        child_b = ComponentIdentifier(
-            class_name="ChildB",
-            class_module="pyrit.target",
-            params={"model_name": "gpt-3.5-turbo"},
-        )
-        identifier = ComponentIdentifier(
-            class_name="MultiChildScorer",
-            class_module="pyrit.score",
-            children={"targets": [child_a, child_b]},
-        )
-        result = _build_eval_dict(identifier)
-
-        assert "children" in result
-        assert isinstance(result["children"]["targets"], list)
-        assert len(result["children"]["targets"]) == 2
-
-    def test_single_child_list_unwrapped(self):
-        """Test that a single-element child list is unwrapped to a scalar hash."""
-        child = ComponentIdentifier(
-            class_name="OnlyChild",
-            class_module="pyrit.target",
-            params={"model_name": "gpt-4"},
-        )
-        identifier = ComponentIdentifier(
-            class_name="SingleChildScorer",
-            class_module="pyrit.score",
-            children={"target": child},
-        )
-        result = _build_eval_dict(identifier)
-
-        # Single child should be a scalar string, not a list
-        assert isinstance(result["children"]["target"], str)
-
-    def test_no_children_key_when_empty(self):
-        """Test that 'children' key is absent when there are no children."""
-        identifier = ComponentIdentifier(
-            class_name="NoChildScorer",
-            class_module="pyrit.score",
-            params={"threshold": 0.5},
-        )
-        result = _build_eval_dict(identifier)
-
-        assert "children" not in result
-
-    def test_non_target_children_include_all_params(self):
-        """Test that non-target children (e.g., sub-scorers) include all params, not just behavioral ones."""
-        child = ComponentIdentifier(
-            class_name="SubScorer",
-            class_module="pyrit.score",
-            params={
-                "model_name": "gpt-4",
-                "temperature": 0.7,
-                "system_prompt_template": "custom_prompt",
-                "threshold": 0.8,
-            },
-        )
-        identifier = ComponentIdentifier(
-            class_name="ParentScorer",
-            class_module="pyrit.score",
-            children={"sub_scorer": child},
-        )
-        result = _build_eval_dict(identifier)
-
-        assert "children" in result
-        assert isinstance(result["children"]["sub_scorer"], str)
-
-    def test_non_target_children_with_different_params_produce_different_hash(self):
-        """Test that non-target children differing in any param produce different hashes."""
-        child1 = ComponentIdentifier(
-            class_name="SubScorer",
-            class_module="pyrit.score",
-            params={"system_prompt_template": "prompt_a", "endpoint": "https://a.com"},
-        )
-        child2 = ComponentIdentifier(
-            class_name="SubScorer",
-            class_module="pyrit.score",
-            params={"system_prompt_template": "prompt_a", "endpoint": "https://b.com"},
-        )
-        id1 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"sub_scorer": child1},
-        )
-        id2 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"sub_scorer": child2},
-        )
-        result1 = _build_eval_dict(id1)
-        result2 = _build_eval_dict(id2)
-
-        # Non-target children use full eval treatment, so all params matter
-        assert result1["children"]["sub_scorer"] != result2["children"]["sub_scorer"]
-
-    def test_target_vs_non_target_children_handled_differently(self):
-        """Test that target children filter params while non-target children keep all params."""
-        child = ComponentIdentifier(
-            class_name="SomeComponent",
-            class_module="pyrit.target",
-            params={
-                "model_name": "gpt-4",
-                "endpoint": "https://example.com",
-            },
-        )
-
-        # Same child as a target child (behavioral filtering applies)
-        id_as_target = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child},
-        )
-        # Same child as a non-target child (full eval treatment)
-        id_as_non_target = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"sub_scorer": child},
-        )
-
-        result_target = _build_eval_dict(id_as_target)
-        result_non_target = _build_eval_dict(id_as_non_target)
-
-        # The child hashes should differ because target filtering drops "endpoint"
-        assert result_target["children"]["prompt_target"] != result_non_target["children"]["sub_scorer"]
-
-    def test_converter_target_children_filtered_like_prompt_target(self):
-        """Test that converter_target children are also filtered to behavioral params only."""
-        child1 = ComponentIdentifier(
-            class_name="ConverterTarget",
-            class_module="pyrit.target",
-            params={
-                "model_name": "gpt-4",
-                "temperature": 0.7,
-                "endpoint": "https://endpoint-a.com",
-            },
-        )
-        child2 = ComponentIdentifier(
-            class_name="ConverterTarget",
-            class_module="pyrit.target",
-            params={
-                "model_name": "gpt-4",
-                "temperature": 0.7,
-                "endpoint": "https://endpoint-b.com",
-            },
-        )
-        id1 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"converter_target": child1},
-        )
-        id2 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"converter_target": child2},
-        )
-        result1 = _build_eval_dict(id1)
-        result2 = _build_eval_dict(id2)
-
-        # Operational param "endpoint" should be filtered, so hashes match
-        assert result1["children"]["converter_target"] == result2["children"]["converter_target"]
-
-
-class TestComputeEvalHash:
-    """Tests for the compute_eval_hash function."""
-
-    def test_deterministic_for_same_identifier(self):
-        """Test that compute_eval_hash returns the same hash for the same identifier."""
-        identifier = ComponentIdentifier(
-            class_name="StableScorer",
-            class_module="pyrit.score",
-            params={"threshold": 0.5},
-        )
-        hash1 = compute_eval_hash(identifier)
-        hash2 = compute_eval_hash(identifier)
-
-        assert hash1 == hash2
-
-    def test_returns_hex_string(self):
-        """Test that compute_eval_hash returns a valid hex string."""
-        identifier = ComponentIdentifier(
-            class_name="HexScorer",
-            class_module="pyrit.score",
-        )
-        result = compute_eval_hash(identifier)
-
-        assert isinstance(result, str)
-        assert len(result) == 64  # SHA-256 hex digest
-        assert all(c in "0123456789abcdef" for c in result)
-
-    def test_different_class_names_produce_different_hashes(self):
-        """Test that different class names produce different eval hashes."""
-        id1 = ComponentIdentifier(class_name="ScorerA", class_module="pyrit.score")
-        id2 = ComponentIdentifier(class_name="ScorerB", class_module="pyrit.score")
-
-        assert compute_eval_hash(id1) != compute_eval_hash(id2)
-
-    def test_different_params_produce_different_hashes(self):
-        """Test that different params produce different eval hashes."""
-        id1 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            params={"threshold": 0.5},
-        )
-        id2 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            params={"threshold": 0.8},
-        )
-
-        assert compute_eval_hash(id1) != compute_eval_hash(id2)
-
-    def test_eval_hash_differs_from_component_hash(self):
-        """Test that eval hash differs from the ComponentIdentifier.hash for target children with operational params."""
-        child = ComponentIdentifier(
-            class_name="Target",
-            class_module="pyrit.target",
-            params={
-                "model_name": "gpt-4",
-                "endpoint": "https://example.com",
-            },
-        )
-        identifier = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child},
-        )
-
-        eval_hash = compute_eval_hash(identifier)
-        component_hash = identifier.hash
-
-        # They should differ because eval hash filters operational params from target children
-        assert eval_hash != component_hash
-
-    def test_operational_child_params_ignored_in_eval_hash(self):
-        """Test that operational params on target children don't affect eval hash."""
-        child1 = ComponentIdentifier(
-            class_name="Target",
-            class_module="pyrit.target",
-            params={
-                "model_name": "gpt-4",
-                "temperature": 0.7,
-                "endpoint": "https://endpoint-a.com",
-                "max_requests_per_minute": 50,
-            },
-        )
-        child2 = ComponentIdentifier(
-            class_name="Target",
-            class_module="pyrit.target",
-            params={
-                "model_name": "gpt-4",
-                "temperature": 0.7,
-                "endpoint": "https://endpoint-b.com",
-                "max_requests_per_minute": 200,
-            },
-        )
-        id1 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child1},
-        )
-        id2 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child2},
-        )
-
-        assert compute_eval_hash(id1) == compute_eval_hash(id2)
-
-    def test_behavioral_child_params_affect_eval_hash(self):
-        """Test that behavioral params on target children do affect eval hash."""
-        child1 = ComponentIdentifier(
-            class_name="Target",
-            class_module="pyrit.target",
-            params={"model_name": "gpt-4", "temperature": 0.7},
-        )
-        child2 = ComponentIdentifier(
-            class_name="Target",
-            class_module="pyrit.target",
-            params={"model_name": "gpt-4", "temperature": 0.0},
-        )
-        id1 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child1},
-        )
-        id2 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            children={"prompt_target": child2},
-        )
-
-        assert compute_eval_hash(id1) != compute_eval_hash(id2)
-
-    def test_scorer_own_params_all_included(self):
-        """Test that all of the scorer's own params (not just behavioral) are included."""
-        id1 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            params={"system_prompt_template": "template_a"},
-        )
-        id2 = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score",
-            params={"system_prompt_template": "template_b"},
-        )
-
-        assert compute_eval_hash(id1) != compute_eval_hash(id2)

From 14c887f35b66acc650528f9996042e92c1365f52 Mon Sep 17 00:00:00 2001
From: Richard Lundeen <rlundeen@microsoft.com>
Date: Mon, 2 Mar 2026 16:49:39 -0800
Subject: [PATCH 2/5] fixing flaky test

---
 tests/unit/target/test_prompt_target_text.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/unit/target/test_prompt_target_text.py b/tests/unit/target/test_prompt_target_text.py
index ceee9eaa00..d6cbb4140a 100644
--- a/tests/unit/target/test_prompt_target_text.py
+++ b/tests/unit/target/test_prompt_target_text.py
@@ -20,6 +20,7 @@ def sample_entries() -> MutableSequence[MessagePiece]:
 
 
 @pytest.mark.asyncio
+@pytest.mark.usefixtures("patch_central_database")
 async def test_send_prompt_user_no_system(sample_entries: MutableSequence[MessagePiece]):
     output_stream = io.StringIO()
     no_op = TextTarget(text_stream=output_stream)
@@ -37,6 +38,7 @@ async def test_send_prompt_user_no_system(sample_entries: MutableSequence[Messag
 
 
 @pytest.mark.asyncio
+@pytest.mark.usefixtures("patch_central_database")
 async def test_send_prompt_stream(sample_entries: MutableSequence[MessagePiece]):
     with NamedTemporaryFile(mode="w+", delete=False) as tmp_file:
         prompt = "hi, I am a victim chatbot, how can I help?"

From a7303c034806895786bf5e3bcf4cb452993b4db9 Mon Sep 17 00:00:00 2001
From: Richard Lundeen <rlundeen@microsoft.com>
Date: Tue, 3 Mar 2026 13:02:58 -0800
Subject: [PATCH 3/5] pr feedback

---
 doc/api.rst                                   |   2 +
 pyrit/identifiers/__init__.py                 |   3 +
 pyrit/identifiers/component_identifier.py     | 179 -----------
 pyrit/identifiers/evaluation_identity.py      | 173 ++++++++++
 pyrit/models/scenario_result.py               |   3 +-
 pyrit/score/printer/console_scorer_printer.py |   6 +-
 pyrit/score/scorer.py                         |  22 +-
 .../scorer_evaluation_identity.py             |  28 ++
 .../identifiers/test_component_identifier.py  | 298 +++---------------
 .../identifiers/test_evaluation_identity.py   | 219 +++++++++++++
 .../score/test_scorer_evaluation_identity.py  | 156 +++++++++
 11 files changed, 644 insertions(+), 445 deletions(-)
 create mode 100644 pyrit/identifiers/evaluation_identity.py
 create mode 100644 pyrit/score/scorer_evaluation/scorer_evaluation_identity.py
 create mode 100644 tests/unit/identifiers/test_evaluation_identity.py
 create mode 100644 tests/unit/score/test_scorer_evaluation_identity.py

diff --git a/doc/api.rst b/doc/api.rst
index 8cc7a73ce0..0af9385233 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -273,7 +273,9 @@ API Reference
 
     class_name_to_snake_case
     ComponentIdentifier
+    compute_eval_hash
     config_hash
+    EvaluationIdentity
     Identifiable
     snake_case_to_class_name
 
diff --git a/pyrit/identifiers/__init__.py b/pyrit/identifiers/__init__.py
index 04fae18195..4c2de95bcb 100644
--- a/pyrit/identifiers/__init__.py
+++ b/pyrit/identifiers/__init__.py
@@ -8,10 +8,13 @@
     snake_case_to_class_name,
 )
 from pyrit.identifiers.component_identifier import ComponentIdentifier, Identifiable, config_hash
+from pyrit.identifiers.evaluation_identity import EvaluationIdentity, compute_eval_hash
 
 __all__ = [
     "class_name_to_snake_case",
     "ComponentIdentifier",
+    "compute_eval_hash",
+    "EvaluationIdentity",
     "Identifiable",
     "snake_case_to_class_name",
     "config_hash",
diff --git a/pyrit/identifiers/component_identifier.py b/pyrit/identifiers/component_identifier.py
index acf9bc57ee..fe306053ae 100644
--- a/pyrit/identifiers/component_identifier.py
+++ b/pyrit/identifiers/component_identifier.py
@@ -17,7 +17,6 @@
 from __future__ import annotations
 
 import hashlib
-import importlib
 import json
 import logging
 from abc import ABC, abstractmethod
@@ -99,78 +98,6 @@ def _build_hash_dict(
     return hash_dict
 
 
-def _build_eval_dict(
-    identifier: ComponentIdentifier,
-    *,
-    target_child_keys: frozenset[str],
-    behavioral_child_params: frozenset[str],
-    param_allowlist: Optional[frozenset[str]] = None,
-) -> dict[str, Any]:
-    """
-    Build a filtered dictionary for eval-hash computation.
-
-    Includes only behavioral parameters. For child components whose names appear
-    in ``target_child_keys``, only params in ``behavioral_child_params`` are kept
-    (stripping operational params like endpoint, max_requests_per_minute).
-    Non-target children receive full eval treatment recursively.
-
-    Args:
-        identifier (ComponentIdentifier): The component identity to process.
-        target_child_keys (frozenset[str]): Child names that are targets
-            (e.g., ``{"prompt_target", "converter_target"}``).
-        behavioral_child_params (frozenset[str]): Param allowlist applied to
-            target children (e.g., ``{"model_name", "temperature", "top_p"}``).
-        param_allowlist (Optional[frozenset[str]]): If provided, only include
-            params whose keys are in the allowlist. If None, include all params.
-
-    Returns:
-        dict[str, Any]: The filtered dictionary suitable for hashing.
-    """
-    eval_dict: dict[str, Any] = {
-        ComponentIdentifier.KEY_CLASS_NAME: identifier.class_name,
-        ComponentIdentifier.KEY_CLASS_MODULE: identifier.class_module,
-    }
-
-    for key, value in sorted(identifier.params.items()):
-        if value is not None and (param_allowlist is None or key in param_allowlist):
-            eval_dict[key] = value
-
-    if identifier.children:
-        eval_children: dict[str, Any] = {}
-        for name in sorted(identifier.children):
-            child_list = identifier.get_child_list(name)
-            if name in target_child_keys:
-                # Targets: filter to behavioral params only
-                hashes = [
-                    config_hash(
-                        _build_eval_dict(
-                            c,
-                            target_child_keys=target_child_keys,
-                            behavioral_child_params=behavioral_child_params,
-                            param_allowlist=behavioral_child_params,
-                        )
-                    )
-                    for c in child_list
-                ]
-            else:
-                # Non-targets (e.g., sub-scorers): full eval treatment, recurse without param filtering
-                hashes = [
-                    config_hash(
-                        _build_eval_dict(
-                            c,
-                            target_child_keys=target_child_keys,
-                            behavioral_child_params=behavioral_child_params,
-                        )
-                    )
-                    for c in child_list
-                ]
-            eval_children[name] = hashes[0] if len(hashes) == 1 else hashes
-        if eval_children:
-            eval_dict["children"] = eval_children
-
-    return eval_dict
-
-
 @dataclass(frozen=True)
 class ComponentIdentifier:
     """
@@ -238,82 +165,6 @@ def unique_name(self) -> str:
         """
         return f"{self.class_name}::{self.short_hash}"
 
-    def compute_eval_hash(
-        self,
-        *,
-        target_child_keys: Optional[frozenset[str]] = None,
-        behavioral_child_params: Optional[frozenset[str]] = None,
-    ) -> str:
-        """
-        Compute a behavioral equivalence hash for evaluation grouping.
-
-        Unlike ``hash`` (which includes all params of self and children), the eval
-        hash filters child components that are "targets" to only their behavioral
-        params (e.g., model_name, temperature, top_p), stripping operational params
-        like endpoint or max_requests_per_minute. This ensures the same logical
-        configuration on different deployments produces the same eval hash.
-
-        Non-target children (e.g., sub-scorers) receive full recursive eval treatment.
-
-        When ``target_child_keys`` and ``behavioral_child_params`` are not provided,
-        the method dynamically resolves the class from ``class_module`` / ``class_name``
-        and reads its ``EVAL_TARGET_CHILD_KEYS`` and ``EVAL_BEHAVIORAL_CHILD_PARAMS``
-        ClassVars. If the class cannot be resolved, returns ``self.hash``.
-
-        When ``target_child_keys`` is empty (explicitly or from the resolved class),
-        no child filtering occurs and the result equals ``self.hash``.
-
-        Args:
-            target_child_keys (Optional[frozenset[str]]): Child names that are targets.
-                If None, resolved dynamically from the component class.
-            behavioral_child_params (Optional[frozenset[str]]): Param allowlist for
-                target children. If None, resolved dynamically from the component class.
-
-        Returns:
-            str: A hex-encoded SHA256 hash suitable for eval registry keying.
-        """
-        if target_child_keys is None or behavioral_child_params is None:
-            resolved_keys, resolved_params = self._resolve_eval_config()
-            if target_child_keys is None:
-                target_child_keys = resolved_keys
-            if behavioral_child_params is None:
-                behavioral_child_params = resolved_params
-
-        if not target_child_keys:
-            return self.hash
-
-        eval_dict = _build_eval_dict(
-            self,
-            target_child_keys=target_child_keys,
-            behavioral_child_params=behavioral_child_params,
-        )
-        return config_hash(eval_dict)
-
-    def _resolve_eval_config(self) -> tuple[frozenset[str], frozenset[str]]:
-        """
-        Dynamically resolve eval-hash configuration from the component class.
-
-        Uses ``importlib`` to import ``self.class_module`` and look up
-        ``self.class_name`` to read ``EVAL_TARGET_CHILD_KEYS`` and
-        ``EVAL_BEHAVIORAL_CHILD_PARAMS`` ClassVars. Returns empty frozensets
-        if the class cannot be resolved.
-
-        Returns:
-            Tuple of (target_child_keys, behavioral_child_params) frozensets.
-        """
-        empty: tuple[frozenset[str], frozenset[str]] = (frozenset(), frozenset())
-        try:
-            module = importlib.import_module(self.class_module)
-            cls = getattr(module, self.class_name, None)
-            if cls is None:
-                return empty
-            return (
-                getattr(cls, "EVAL_TARGET_CHILD_KEYS", frozenset()),
-                getattr(cls, "EVAL_BEHAVIORAL_CHILD_PARAMS", frozenset()),
-            )
-        except Exception:
-            return empty
-
     @classmethod
     def of(
         cls,
@@ -601,21 +452,8 @@ class Identifiable(ABC):
     Components implement ``_build_identifier()`` to return a frozen ComponentIdentifier
     snapshot. The identifier is built lazily on first access and cached for the
     component's lifetime.
-
-    Subclasses that participate in evaluation grouping (e.g., scorers, attack strategies)
-    should override ``EVAL_TARGET_CHILD_KEYS`` and ``EVAL_BEHAVIORAL_CHILD_PARAMS`` to
-    declare which children are "targets" and which target params are behavioral. The
-    ``get_eval_hash()`` convenience method uses these ClassVars to compute a behavioral
-    equivalence hash via ``ComponentIdentifier.compute_eval_hash()``.
     """
 
-    # Override in subclasses to declare which children are "targets" whose operational
-    # params (endpoint, max_requests_per_minute, etc.) should be stripped for eval hashing.
-    EVAL_TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset()
-    # Override in subclasses to declare which target child params are behavioral
-    # (kept in eval hash). Only used when EVAL_TARGET_CHILD_KEYS is non-empty.
-    EVAL_BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset()
-
     _identifier: Optional[ComponentIdentifier] = None
 
     @abstractmethod
@@ -650,20 +488,3 @@ def get_identifier(self) -> ComponentIdentifier:
         if self._identifier is None:
             self._identifier = self._build_identifier()
         return self._identifier
-
-    def get_eval_hash(self) -> str:
-        """
-        Compute a behavioral equivalence hash for evaluation grouping.
-
-        Uses the class-level ``EVAL_TARGET_CHILD_KEYS`` and
-        ``EVAL_BEHAVIORAL_CHILD_PARAMS`` to determine which children are targets
-        and which target params are behavioral. When both are empty (the default),
-        returns ``self.get_identifier().hash`` — equivalent to the full identity hash.
-
-        Returns:
-            str: A hex-encoded SHA256 hash suitable for eval registry keying.
-        """
-        return self.get_identifier().compute_eval_hash(
-            target_child_keys=self.EVAL_TARGET_CHILD_KEYS,
-            behavioral_child_params=self.EVAL_BEHAVIORAL_CHILD_PARAMS,
-        )
diff --git a/pyrit/identifiers/evaluation_identity.py b/pyrit/identifiers/evaluation_identity.py
new file mode 100644
index 0000000000..64492a66b2
--- /dev/null
+++ b/pyrit/identifiers/evaluation_identity.py
@@ -0,0 +1,173 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Evaluation identity and eval-hash computation.
+
+This module provides:
+
+* ``_build_eval_dict`` — builds a filtered dict for eval-hash computation.
+* ``compute_eval_hash`` — free function that computes a behavioral equivalence
+  hash from a ``ComponentIdentifier``.
+* ``EvaluationIdentity`` — abstract base that wraps a ``ComponentIdentifier``
+  with domain-specific eval-hash configuration.  Concrete subclasses declare
+  *which* children are targets and *which* params are behavioral via two
+  ``ClassVar`` frozensets.
+"""
+
+from __future__ import annotations
+
+from abc import ABC
+from typing import Any, ClassVar, Optional
+
+from pyrit.identifiers.component_identifier import ComponentIdentifier, config_hash
+
+
+def _build_eval_dict(
+    identifier: ComponentIdentifier,
+    *,
+    target_child_keys: frozenset[str],
+    behavioral_child_params: frozenset[str],
+    param_allowlist: Optional[frozenset[str]] = None,
+) -> dict[str, Any]:
+    """
+    Build a filtered dictionary for eval-hash computation.
+
+    Includes only behavioral parameters. For child components whose names appear
+    in ``target_child_keys``, only params in ``behavioral_child_params`` are kept
+    (stripping operational params like endpoint, max_requests_per_minute).
+    Non-target children receive full eval treatment recursively.
+
+    Args:
+        identifier (ComponentIdentifier): The component identity to process.
+        target_child_keys (frozenset[str]): Child names that are targets
+            (e.g., ``{"prompt_target", "converter_target"}``).
+        behavioral_child_params (frozenset[str]): Param allowlist applied to
+            target children (e.g., ``{"model_name", "temperature", "top_p"}``).
+        param_allowlist (Optional[frozenset[str]]): If provided, only include
+            params whose keys are in the allowlist. If None, include all params.
+
+    Returns:
+        dict[str, Any]: The filtered dictionary suitable for hashing.
+    """
+    eval_dict: dict[str, Any] = {
+        ComponentIdentifier.KEY_CLASS_NAME: identifier.class_name,
+        ComponentIdentifier.KEY_CLASS_MODULE: identifier.class_module,
+    }
+
+    for key, value in sorted(identifier.params.items()):
+        if value is not None and (param_allowlist is None or key in param_allowlist):
+            eval_dict[key] = value
+
+    if identifier.children:
+        eval_children: dict[str, Any] = {}
+        for name in sorted(identifier.children):
+            child_list = identifier.get_child_list(name)
+            if name in target_child_keys:
+                # Targets: filter to behavioral params only
+                hashes = [
+                    config_hash(
+                        _build_eval_dict(
+                            c,
+                            target_child_keys=target_child_keys,
+                            behavioral_child_params=behavioral_child_params,
+                            param_allowlist=behavioral_child_params,
+                        )
+                    )
+                    for c in child_list
+                ]
+            else:
+                # Non-targets (e.g., sub-scorers): full eval treatment, recurse without param filtering
+                hashes = [
+                    config_hash(
+                        _build_eval_dict(
+                            c,
+                            target_child_keys=target_child_keys,
+                            behavioral_child_params=behavioral_child_params,
+                        )
+                    )
+                    for c in child_list
+                ]
+            eval_children[name] = hashes[0] if len(hashes) == 1 else hashes
+        if eval_children:
+            eval_dict["children"] = eval_children
+
+    return eval_dict
+
+
+def compute_eval_hash(
+    identifier: ComponentIdentifier,
+    *,
+    target_child_keys: frozenset[str],
+    behavioral_child_params: frozenset[str],
+) -> str:
+    """
+    Compute a behavioral equivalence hash for evaluation grouping.
+
+    Unlike ``ComponentIdentifier.hash`` (which includes all params of self and
+    children), the eval hash filters child components that are "targets" to only
+    their behavioral params (e.g., model_name, temperature, top_p), stripping
+    operational params like endpoint or max_requests_per_minute. This ensures the
+    same logical configuration on different deployments produces the same eval hash.
+
+    Non-target children (e.g., sub-scorers) receive full recursive eval treatment.
+
+    When ``target_child_keys`` is empty, no child filtering occurs and the result
+    equals ``identifier.hash``.
+
+    Args:
+        identifier (ComponentIdentifier): The component identity to compute the hash for.
+        target_child_keys (frozenset[str]): Child names that are targets
+            (e.g., ``{"prompt_target", "converter_target"}``).
+        behavioral_child_params (frozenset[str]): Param allowlist for target children
+            (e.g., ``{"model_name", "temperature", "top_p"}``).
+
+    Returns:
+        str: A hex-encoded SHA256 hash suitable for eval registry keying.
+    """
+    if not target_child_keys:
+        return identifier.hash
+
+    eval_dict = _build_eval_dict(
+        identifier,
+        target_child_keys=target_child_keys,
+        behavioral_child_params=behavioral_child_params,
+    )
+    return config_hash(eval_dict)
+
+
+class EvaluationIdentity(ABC):
+    """
+    Wraps a ``ComponentIdentifier`` with domain-specific eval-hash configuration.
+
+    Subclasses must set the two ``ClassVar`` frozensets:
+
+    * ``TARGET_CHILD_KEYS`` — child names whose operational params should be
+      stripped (e.g., ``{"prompt_target", "converter_target"}``).
+    * ``BEHAVIORAL_CHILD_PARAMS`` — param allowlist applied to those target
+      children (e.g., ``{"model_name", "temperature", "top_p"}``).
+
+    The concrete ``eval_hash`` property delegates to the module-level
+    ``compute_eval_hash`` free function.
+    """
+
+    TARGET_CHILD_KEYS: ClassVar[frozenset[str]]
+    BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]]
+
+    def __init__(self, identifier: ComponentIdentifier) -> None:
+        self._identifier = identifier
+        self._eval_hash = compute_eval_hash(
+            identifier,
+            target_child_keys=self.TARGET_CHILD_KEYS,
+            behavioral_child_params=self.BEHAVIORAL_CHILD_PARAMS,
+        )
+
+    @property
+    def identifier(self) -> ComponentIdentifier:
+        """The underlying component identity."""
+        return self._identifier
+
+    @property
+    def eval_hash(self) -> str:
+        """Behavioral equivalence hash for evaluation grouping."""
+        return self._eval_hash
diff --git a/pyrit/models/scenario_result.py b/pyrit/models/scenario_result.py
index 5d8f1cb7d8..ebb3eddc9a 100644
--- a/pyrit/models/scenario_result.py
+++ b/pyrit/models/scenario_result.py
@@ -224,6 +224,7 @@ def get_scorer_evaluation_metrics(self) -> Optional["ScorerMetrics"]:
 
         """
         # import here to avoid circular imports
+        from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
             find_objective_metrics_by_eval_hash,
         )
@@ -231,6 +232,6 @@ def get_scorer_evaluation_metrics(self) -> Optional["ScorerMetrics"]:
         if not self.objective_scorer_identifier:
             return None
 
-        eval_hash = self.objective_scorer_identifier.compute_eval_hash()
+        eval_hash = ScorerEvaluationIdentity(self.objective_scorer_identifier).eval_hash
 
         return find_objective_metrics_by_eval_hash(eval_hash=eval_hash)
diff --git a/pyrit/score/printer/console_scorer_printer.py b/pyrit/score/printer/console_scorer_printer.py
index 05a3e18ded..108070318c 100644
--- a/pyrit/score/printer/console_scorer_printer.py
+++ b/pyrit/score/printer/console_scorer_printer.py
@@ -100,6 +100,7 @@ def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> N
         Args:
             scorer_identifier (ComponentIdentifier): The scorer identifier to print information for.
         """
+        from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
             find_objective_metrics_by_eval_hash,
         )
@@ -110,7 +111,7 @@ def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> N
         self._print_scorer_info(scorer_identifier, indent_level=3)
 
         # Look up metrics by eval hash
-        eval_hash = scorer_identifier.compute_eval_hash()
+        eval_hash = ScorerEvaluationIdentity(scorer_identifier).eval_hash
         metrics = find_objective_metrics_by_eval_hash(eval_hash=eval_hash)
         self._print_objective_metrics(metrics)
 
@@ -127,6 +128,7 @@ def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_cate
             scorer_identifier (ComponentIdentifier): The scorer identifier to print information for.
             harm_category (str): The harm category for looking up metrics (e.g., "hate_speech", "violence").
         """
+        from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
         from pyrit.score.scorer_evaluation.scorer_metrics_io import (
             find_harm_metrics_by_eval_hash,
         )
@@ -137,7 +139,7 @@ def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_cate
         self._print_scorer_info(scorer_identifier, indent_level=3)
 
         # Look up metrics by eval hash and harm category
-        eval_hash = scorer_identifier.compute_eval_hash()
+        eval_hash = ScorerEvaluationIdentity(scorer_identifier).eval_hash
         metrics = find_harm_metrics_by_eval_hash(eval_hash=eval_hash, harm_category=harm_category)
         self._print_harm_metrics(metrics)
 
diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
index f6196350a3..9dec55a8f1 100644
--- a/pyrit/score/scorer.py
+++ b/pyrit/score/scorer.py
@@ -13,7 +13,6 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    ClassVar,
     Optional,
     Union,
     cast,
@@ -55,11 +54,6 @@ class Scorer(Identifiable, abc.ABC):
     Abstract base class for scorers.
     """
 
-    # Eval-hash configuration: which children are "targets" (operational params stripped)
-    # and which target params are behavioral (kept in eval hash).
-    EVAL_TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"prompt_target", "converter_target"})
-    EVAL_BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name", "temperature", "top_p"})
-
     # Evaluation configuration - maps input dataset files to a result file.
     # Specifies glob patterns for datasets and a result file name.
     evaluation_file_mapping: Optional[ScorerEvalDatasetFiles] = None
@@ -75,6 +69,22 @@ def __init__(self, *, validator: ScorerPromptValidator):
         """
         self._validator = validator
 
+    def get_eval_hash(self) -> str:
+        """
+        Compute a behavioral equivalence hash for evaluation grouping.
+
+        Delegates to ``ScorerEvaluationIdentity`` which filters target children
+        (prompt_target, converter_target) to behavioral params only, so the same
+        scorer configuration on different deployments produces the same eval hash.
+
+        Returns:
+            str: A hex-encoded SHA256 hash suitable for eval registry keying.
+        """
+        # Deferred import to avoid circular dependency (scorer_evaluation_identity → identifiers → …)
+        from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
+
+        return ScorerEvaluationIdentity(self.get_identifier()).eval_hash
+
     @property
     def scorer_type(self) -> ScoreType:
         """
diff --git a/pyrit/score/scorer_evaluation/scorer_evaluation_identity.py b/pyrit/score/scorer_evaluation/scorer_evaluation_identity.py
new file mode 100644
index 0000000000..184109bfe6
--- /dev/null
+++ b/pyrit/score/scorer_evaluation/scorer_evaluation_identity.py
@@ -0,0 +1,28 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Scorer-specific evaluation identity.
+
+``ScorerEvaluationIdentity`` declares which children are "targets" and which
+target params are behavioral for the scorer evaluation domain.
+"""
+
+from __future__ import annotations
+
+from typing import ClassVar
+
+from pyrit.identifiers.evaluation_identity import EvaluationIdentity
+
+
+class ScorerEvaluationIdentity(EvaluationIdentity):
+    """
+    Evaluation identity for scorers.
+
+    Target children (``prompt_target``, ``converter_target``) are filtered to
+    behavioral params only (``model_name``, ``temperature``, ``top_p``), so the
+    same scorer configuration on different deployments produces the same eval hash.
+    """
+
+    TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"prompt_target", "converter_target"})
+    BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name", "temperature", "top_p"})
diff --git a/tests/unit/identifiers/test_component_identifier.py b/tests/unit/identifiers/test_component_identifier.py
index a727c3e212..7dfd9c9871 100644
--- a/tests/unit/identifiers/test_component_identifier.py
+++ b/tests/unit/identifiers/test_component_identifier.py
@@ -2,13 +2,11 @@
 # Licensed under the MIT license.
 
 
-from typing import ClassVar
-
 import pytest
 
 import pyrit
-from pyrit.identifiers import ComponentIdentifier, Identifiable, config_hash
-from pyrit.identifiers.component_identifier import _build_eval_dict
+from pyrit.identifiers import ComponentIdentifier, Identifiable, compute_eval_hash, config_hash
+from pyrit.identifiers.evaluation_identity import _build_eval_dict
 
 # Test constants mirroring Scorer's ClassVars — keeps tests decoupled from pyrit.score
 _TARGET_CHILD_KEYS = frozenset({"prompt_target", "converter_target"})
@@ -723,73 +721,6 @@ def _build_identifier(self) -> ComponentIdentifier:
         assert identifier.params["key"] == "val"
 
 
-class TestResolveEvalConfig:
-    """Tests for ComponentIdentifier._resolve_eval_config dynamic class lookup."""
-
-    def test_resolves_scorer_class_vars(self):
-        """Test that _resolve_eval_config returns Scorer's ClassVars for a Scorer identifier."""
-        from pyrit.score import Scorer
-
-        identifier = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score.scorer",
-        )
-        keys, params = identifier._resolve_eval_config()
-
-        assert keys == Scorer.EVAL_TARGET_CHILD_KEYS
-        assert params == Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS
-        # Verify specific members to catch accidental ClassVar drift
-        assert "prompt_target" in keys
-        assert "converter_target" in keys
-        assert "model_name" in params
-        assert "temperature" in params
-        assert "top_p" in params
-
-    def test_returns_empty_for_unresolvable_module(self):
-        """Test that _resolve_eval_config returns empty frozensets for a module that cannot be imported."""
-        identifier = ComponentIdentifier(
-            class_name="DoesNotExist",
-            class_module="no.such.module.exists",
-        )
-        keys, params = identifier._resolve_eval_config()
-
-        assert keys == frozenset()
-        assert params == frozenset()
-
-    def test_returns_empty_for_nonexistent_class(self):
-        """Test that _resolve_eval_config returns empty frozensets when the class name doesn't exist in the module."""
-        identifier = ComponentIdentifier(
-            class_name="NoSuchClassName",
-            class_module="pyrit.score.scorer",
-        )
-        keys, params = identifier._resolve_eval_config()
-
-        assert keys == frozenset()
-        assert params == frozenset()
-
-    def test_returns_empty_for_class_without_classvars(self):
-        """Test that _resolve_eval_config returns empty frozensets when the class has no eval ClassVars."""
-        identifier = ComponentIdentifier(
-            class_name="ComponentIdentifier",
-            class_module="pyrit.identifiers.component_identifier",
-        )
-        keys, params = identifier._resolve_eval_config()
-
-        assert keys == frozenset()
-        assert params == frozenset()
-
-    def test_returns_identifiable_defaults_for_base_class(self):
-        """Test that _resolve_eval_config returns Identifiable's default empty frozensets."""
-        identifier = ComponentIdentifier(
-            class_name="Identifiable",
-            class_module="pyrit.identifiers.component_identifier",
-        )
-        keys, params = identifier._resolve_eval_config()
-
-        assert keys == frozenset()
-        assert params == frozenset()
-
-
 class TestBuildEvalDict:
     """Tests for the _build_eval_dict function."""
 
@@ -1113,9 +1044,7 @@ def test_converter_target_filtered_like_prompt_target(self):
 
 
 class TestComputeEvalHash:
-    """Tests for ComponentIdentifier.compute_eval_hash (explicit and zero-arg paths)."""
-
-    # --- Explicit-arg tests ---
+    """Tests for the compute_eval_hash free function."""
 
     def test_deterministic_for_same_identifier(self):
         """Test that compute_eval_hash returns the same hash for the same identifier."""
@@ -1124,11 +1053,13 @@ def test_deterministic_for_same_identifier(self):
             class_module="pyrit.score",
             params={"threshold": 0.5},
         )
-        hash1 = identifier.compute_eval_hash(
+        hash1 = compute_eval_hash(
+            identifier,
             target_child_keys=_TARGET_CHILD_KEYS,
             behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
         )
-        hash2 = identifier.compute_eval_hash(
+        hash2 = compute_eval_hash(
+            identifier,
             target_child_keys=_TARGET_CHILD_KEYS,
             behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
         )
@@ -1141,7 +1072,8 @@ def test_returns_hex_string(self):
             class_name="HexScorer",
             class_module="pyrit.score",
         )
-        result = identifier.compute_eval_hash(
+        result = compute_eval_hash(
+            identifier,
             target_child_keys=_TARGET_CHILD_KEYS,
             behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
         )
@@ -1155,12 +1087,10 @@ def test_different_class_names_produce_different_hashes(self):
         id1 = ComponentIdentifier(class_name="ScorerA", class_module="pyrit.score")
         id2 = ComponentIdentifier(class_name="ScorerB", class_module="pyrit.score")
 
-        assert id1.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
-        ) != id2.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        assert compute_eval_hash(
+            id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        ) != compute_eval_hash(
+            id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
         )
 
     def test_different_params_produce_different_hashes(self):
@@ -1168,16 +1098,14 @@ def test_different_params_produce_different_hashes(self):
         id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", params={"threshold": 0.5})
         id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", params={"threshold": 0.8})
 
-        assert id1.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
-        ) != id2.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        assert compute_eval_hash(
+            id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        ) != compute_eval_hash(
+            id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
         )
 
     def test_eval_hash_differs_from_component_hash(self):
-        """Test that eval hash differs from hash when target children have operational params."""
+        """Test that eval hash differs from component hash when target children have operational params."""
         child = ComponentIdentifier(
             class_name="Target",
             class_module="pyrit.target",
@@ -1189,7 +1117,8 @@ def test_eval_hash_differs_from_component_hash(self):
             children={"prompt_target": child},
         )
 
-        eval_hash = identifier.compute_eval_hash(
+        eval_hash = compute_eval_hash(
+            identifier,
             target_child_keys=_TARGET_CHILD_KEYS,
             behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
         )
@@ -1220,12 +1149,10 @@ def test_operational_child_params_ignored(self):
         id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1})
         id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2})
 
-        assert id1.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
-        ) == id2.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        assert compute_eval_hash(
+            id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        ) == compute_eval_hash(
+            id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
         )
 
     def test_behavioral_child_params_affect_eval_hash(self):
@@ -1243,12 +1170,10 @@ def test_behavioral_child_params_affect_eval_hash(self):
         id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1})
         id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2})
 
-        assert id1.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
-        ) != id2.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        assert compute_eval_hash(
+            id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        ) != compute_eval_hash(
+            id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
         )
 
     def test_scorer_own_params_all_included(self):
@@ -1260,169 +1185,28 @@ def test_scorer_own_params_all_included(self):
             class_name="Scorer", class_module="pyrit.score", params={"system_prompt_template": "template_b"}
         )
 
-        assert id1.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
-        ) != id2.compute_eval_hash(
-            target_child_keys=_TARGET_CHILD_KEYS,
-            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
-        )
-
-    # --- Zero-arg / dynamic-lookup tests ---
-
-    def test_zero_arg_matches_explicit_args_for_scorer(self):
-        """Test that zero-arg compute_eval_hash matches explicit-arg version for Scorer class."""
-        from pyrit.score import Scorer
-
-        child = ComponentIdentifier(
-            class_name="OpenAIChatTarget",
-            class_module="pyrit.prompt_target.openai.openai_chat_target",
-            params={"model_name": "gpt-4", "endpoint": "https://api.example.com"},
-        )
-        identifier = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score.scorer",
-            children={"prompt_target": child},
-        )
-
-        zero_arg_hash = identifier.compute_eval_hash()
-        explicit_hash = identifier.compute_eval_hash(
-            target_child_keys=Scorer.EVAL_TARGET_CHILD_KEYS,
-            behavioral_child_params=Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS,
-        )
-
-        assert zero_arg_hash == explicit_hash
-
-    def test_zero_arg_returns_self_hash_for_unresolvable_class(self):
-        """Test that zero-arg compute_eval_hash falls back to self.hash when class is unresolvable."""
-        identifier = ComponentIdentifier(
-            class_name="NonExistentScorer",
-            class_module="no.such.module",
-            params={"threshold": 0.5},
-        )
-
-        assert identifier.compute_eval_hash() == identifier.hash
-
-    def test_zero_arg_returns_self_hash_for_class_without_classvars(self):
-        """Test that zero-arg compute_eval_hash returns self.hash when class has no eval ClassVars."""
-        identifier = ComponentIdentifier(
-            class_name="ComponentIdentifier",
-            class_module="pyrit.identifiers.component_identifier",
-            params={"some_param": "value"},
-        )
-
-        assert identifier.compute_eval_hash() == identifier.hash
-
-    def test_partial_args_target_keys_only(self):
-        """Test compute_eval_hash with only target_child_keys provided (behavioral_child_params resolved)."""
-        from pyrit.score import Scorer
-
-        child = ComponentIdentifier(
-            class_name="Target",
-            class_module="pyrit.target",
-            params={"model_name": "gpt-4", "endpoint": "https://api.example.com"},
-        )
-        identifier = ComponentIdentifier(
-            class_name="Scorer",
-            class_module="pyrit.score.scorer",
-            children={"prompt_target": child},
-        )
-
-        partial_hash = identifier.compute_eval_hash(
-            target_child_keys=Scorer.EVAL_TARGET_CHILD_KEYS,
-        )
-        full_hash = identifier.compute_eval_hash(
-            target_child_keys=Scorer.EVAL_TARGET_CHILD_KEYS,
-            behavioral_child_params=Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS,
+        assert compute_eval_hash(
+            id1, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        ) != compute_eval_hash(
+            id2, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
         )
 
-        assert partial_hash == full_hash
-
-    def test_partial_args_behavioral_params_only(self):
-        """Test compute_eval_hash with only behavioral_child_params provided (target_child_keys resolved)."""
-        from pyrit.score import Scorer
-
+    def test_empty_target_child_keys_returns_component_hash(self):
+        """Test that empty target_child_keys means no filtering — returns component hash."""
         child = ComponentIdentifier(
             class_name="Target",
             class_module="pyrit.target",
-            params={"model_name": "gpt-4", "endpoint": "https://api.example.com"},
+            params={"model_name": "gpt-4", "endpoint": "https://example.com"},
         )
         identifier = ComponentIdentifier(
             class_name="Scorer",
-            class_module="pyrit.score.scorer",
+            class_module="pyrit.score",
             children={"prompt_target": child},
         )
 
-        partial_hash = identifier.compute_eval_hash(
-            behavioral_child_params=Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS,
-        )
-        full_hash = identifier.compute_eval_hash(
-            target_child_keys=Scorer.EVAL_TARGET_CHILD_KEYS,
-            behavioral_child_params=Scorer.EVAL_BEHAVIORAL_CHILD_PARAMS,
-        )
-
-        assert partial_hash == full_hash
-
-
-class TestGetEvalHash:
-    """Tests for Identifiable.get_eval_hash convenience method."""
-
-    def test_get_eval_hash_uses_classvars(self):
-        """Test that get_eval_hash passes ClassVar overrides to compute_eval_hash."""
-
-        class FakeScorer(Identifiable):
-            EVAL_TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"my_target"})
-            EVAL_BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name"})
-
-            def _build_identifier(self) -> ComponentIdentifier:
-                child = ComponentIdentifier(
-                    class_name="Target",
-                    class_module="pyrit.target",
-                    params={"model_name": "gpt-4", "endpoint": "https://example.com"},
-                )
-                return ComponentIdentifier.of(self, children={"my_target": child})
-
-        scorer = FakeScorer()
-        eval_hash = scorer.get_eval_hash()
-
-        expected = scorer.get_identifier().compute_eval_hash(
-            target_child_keys=frozenset({"my_target"}),
-            behavioral_child_params=frozenset({"model_name"}),
+        result = compute_eval_hash(
+            identifier,
+            target_child_keys=frozenset(),
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
         )
-        assert eval_hash == expected
-
-    def test_get_eval_hash_equals_component_hash_when_no_classvars(self):
-        """Test that get_eval_hash returns component hash when no ClassVar overrides."""
-
-        class SimpleComponent(Identifiable):
-            def _build_identifier(self) -> ComponentIdentifier:
-                return ComponentIdentifier.of(self, params={"key": "value"})
-
-        component = SimpleComponent()
-        assert component.get_eval_hash() == component.get_identifier().hash
-
-    def test_get_eval_hash_filters_operational_params(self):
-        """Test that get_eval_hash filters operational params from target children."""
-
-        class ScorerLike(Identifiable):
-            EVAL_TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"target"})
-            EVAL_BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name"})
-
-            def __init__(self, *, endpoint: str):
-                self._endpoint = endpoint
-
-            def _build_identifier(self) -> ComponentIdentifier:
-                child = ComponentIdentifier(
-                    class_name="Target",
-                    class_module="pyrit.target",
-                    params={"model_name": "gpt-4", "endpoint": self._endpoint},
-                )
-                return ComponentIdentifier.of(self, children={"target": child})
-
-        scorer_a = ScorerLike(endpoint="https://endpoint-a.com")
-        scorer_b = ScorerLike(endpoint="https://endpoint-b.com")
-
-        # Different endpoints should produce same eval hash (operational param filtered)
-        assert scorer_a.get_eval_hash() == scorer_b.get_eval_hash()
-        # But different component hashes (endpoint is in full identity)
-        assert scorer_a.get_identifier().hash != scorer_b.get_identifier().hash
+        assert result == identifier.hash
diff --git a/tests/unit/identifiers/test_evaluation_identity.py b/tests/unit/identifiers/test_evaluation_identity.py
new file mode 100644
index 0000000000..3bbbdd8759
--- /dev/null
+++ b/tests/unit/identifiers/test_evaluation_identity.py
@@ -0,0 +1,219 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Tests for pyrit.identifiers.evaluation_identity.
+
+Covers the ``EvaluationIdentity`` abstract base class, the ``_build_eval_dict``
+helper, and the ``compute_eval_hash`` free function.
+"""
+
+from typing import ClassVar
+
+import pytest
+
+from pyrit.identifiers import ComponentIdentifier, compute_eval_hash
+from pyrit.identifiers.evaluation_identity import EvaluationIdentity, _build_eval_dict
+
+
+# ---------------------------------------------------------------------------
+# Concrete subclass for testing the ABC
+# ---------------------------------------------------------------------------
+
+class _StubEvaluationIdentity(EvaluationIdentity):
+    """Minimal concrete subclass for testing the abstract base class."""
+
+    TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"my_target"})
+    BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name"})
+
+
+# ---------------------------------------------------------------------------
+# Test constants
+# ---------------------------------------------------------------------------
+
+_TARGET_CHILD_KEYS = frozenset({"prompt_target", "converter_target"})
+_BEHAVIORAL_CHILD_PARAMS = frozenset({"model_name", "temperature", "top_p"})
+
+
+class TestBuildEvalDict:
+    """Tests for _build_eval_dict filtering logic."""
+
+    def test_target_child_params_filtered(self):
+        """Test that target children only keep behavioral params."""
+        child = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "endpoint": "https://example.com"},
+        )
+        identifier = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"prompt_target": child},
+        )
+
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        # "endpoint" must not appear anywhere in the child sub-dict
+        assert "endpoint" not in str(result)
+        assert "children" in result
+
+    def test_non_target_child_params_kept(self):
+        """Test that non-target children keep all params (full recursive treatment)."""
+        child = ComponentIdentifier(
+            class_name="SubScorer",
+            class_module="pyrit.score",
+            params={"threshold": 0.5, "extra": "value"},
+        )
+        identifier = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"sub_scorer": child},
+        )
+
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert "children" in result
+
+    def test_no_children_produces_flat_dict(self):
+        """Test that an identifier with no children produces a dict without 'children' key."""
+        identifier = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            params={"threshold": 0.5},
+        )
+
+        result = _build_eval_dict(
+            identifier,
+            target_child_keys=_TARGET_CHILD_KEYS,
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+
+        assert "children" not in result
+        assert result[ComponentIdentifier.KEY_CLASS_NAME] == "Scorer"
+
+
+class TestComputeEvalHash:
+    """Tests for the compute_eval_hash free function."""
+
+    def test_deterministic(self):
+        """Test that the same identifier + config produces the same hash."""
+        identifier = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score")
+        h1 = compute_eval_hash(identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS)
+        h2 = compute_eval_hash(identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS)
+        assert h1 == h2
+
+    def test_empty_target_child_keys_returns_component_hash(self):
+        """Test that empty target_child_keys bypasses filtering and returns component hash."""
+        child = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "endpoint": "https://example.com"},
+        )
+        identifier = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"prompt_target": child},
+        )
+
+        result = compute_eval_hash(
+            identifier,
+            target_child_keys=frozenset(),
+            behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS,
+        )
+        assert result == identifier.hash
+
+    def test_returns_64_char_hex(self):
+        """Test that the hash is a 64-char lowercase hex string (SHA-256)."""
+        identifier = ComponentIdentifier(class_name="S", class_module="m")
+        result = compute_eval_hash(identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS)
+        assert len(result) == 64
+        assert all(c in "0123456789abcdef" for c in result)
+
+
+class TestEvaluationIdentity:
+    """Tests for the EvaluationIdentity abstract base class."""
+
+    def test_identifier_property_returns_original(self):
+        """Test that .identifier returns the ComponentIdentifier passed at construction."""
+        cid = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score")
+        identity = _StubEvaluationIdentity(cid)
+        assert identity.identifier is cid
+
+    def test_eval_hash_is_string(self):
+        """Test that .eval_hash is a valid hex string."""
+        cid = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score")
+        identity = _StubEvaluationIdentity(cid)
+        assert isinstance(identity.eval_hash, str)
+        assert len(identity.eval_hash) == 64
+
+    def test_eval_hash_matches_free_function(self):
+        """Test that .eval_hash matches calling compute_eval_hash directly."""
+        cid = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            params={"threshold": 0.5},
+        )
+        identity = _StubEvaluationIdentity(cid)
+
+        expected = compute_eval_hash(
+            cid,
+            target_child_keys=_StubEvaluationIdentity.TARGET_CHILD_KEYS,
+            behavioral_child_params=_StubEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS,
+        )
+        assert identity.eval_hash == expected
+
+    def test_eval_hash_differs_from_component_hash_when_target_filtered(self):
+        """Test that eval hash differs from component hash when target children have operational params."""
+        child = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "endpoint": "https://example.com"},
+        )
+        cid = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"my_target": child},
+        )
+        identity = _StubEvaluationIdentity(cid)
+
+        # "endpoint" is operational, so eval hash should differ from full component hash
+        assert identity.eval_hash != cid.hash
+
+    def test_cannot_instantiate_abc_directly(self):
+        """Test that EvaluationIdentity cannot be instantiated without ClassVars."""
+        with pytest.raises(AttributeError):
+            EvaluationIdentity(ComponentIdentifier(class_name="X", class_module="m"))  # type: ignore[abstract]
+
+    def test_custom_classvars_produce_expected_hash(self):
+        """Test that a concrete subclass with custom ClassVars produces the correct eval hash."""
+
+        class CustomIdentity(EvaluationIdentity):
+            TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"special_target"})
+            BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name", "temperature"})
+
+        child = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "temperature": 0.7, "endpoint": "https://example.com"},
+        )
+        cid = ComponentIdentifier(
+            class_name="Scorer",
+            class_module="pyrit.score",
+            children={"special_target": child},
+        )
+        identity = CustomIdentity(cid)
+
+        expected = compute_eval_hash(
+            cid,
+            target_child_keys=frozenset({"special_target"}),
+            behavioral_child_params=frozenset({"model_name", "temperature"}),
+        )
+        assert identity.eval_hash == expected
diff --git a/tests/unit/score/test_scorer_evaluation_identity.py b/tests/unit/score/test_scorer_evaluation_identity.py
new file mode 100644
index 0000000000..f1635fcb10
--- /dev/null
+++ b/tests/unit/score/test_scorer_evaluation_identity.py
@@ -0,0 +1,156 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Tests for pyrit.score.scorer_evaluation.scorer_evaluation_identity.
+
+Covers ``ScorerEvaluationIdentity`` ClassVar values, eval-hash delegation, and
+the ``Scorer.get_eval_hash()`` convenience method.
+"""
+
+from typing import ClassVar
+
+import pytest
+
+from pyrit.identifiers import ComponentIdentifier, Identifiable, compute_eval_hash
+from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
+
+
+class TestScorerEvaluationIdentityConstants:
+    """Tests for the ClassVar constants on ScorerEvaluationIdentity."""
+
+    def test_target_child_keys(self):
+        """Test that TARGET_CHILD_KEYS contains the expected scorer target names."""
+        assert ScorerEvaluationIdentity.TARGET_CHILD_KEYS == frozenset({"prompt_target", "converter_target"})
+
+    def test_behavioral_child_params(self):
+        """Test that BEHAVIORAL_CHILD_PARAMS contains the expected behavioral params."""
+        assert ScorerEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS == frozenset({"model_name", "temperature", "top_p"})
+
+
+class TestScorerEvaluationIdentityEvalHash:
+    """Tests for ScorerEvaluationIdentity eval hash computation."""
+
+    def test_deterministic(self):
+        """Test that the same identifier produces the same eval hash."""
+        cid = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", params={"threshold": 0.5})
+        h1 = ScorerEvaluationIdentity(cid).eval_hash
+        h2 = ScorerEvaluationIdentity(cid).eval_hash
+        assert h1 == h2
+
+    def test_operational_params_ignored(self):
+        """Test that operational target params don't affect the scorer eval hash."""
+        child1 = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "endpoint": "https://endpoint-a.com"},
+        )
+        child2 = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "endpoint": "https://endpoint-b.com"},
+        )
+        id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1})
+        id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2})
+
+        assert ScorerEvaluationIdentity(id1).eval_hash == ScorerEvaluationIdentity(id2).eval_hash
+
+    def test_behavioral_params_affect_hash(self):
+        """Test that behavioral target params do affect the scorer eval hash."""
+        child1 = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "temperature": 0.7},
+        )
+        child2 = ComponentIdentifier(
+            class_name="Target",
+            class_module="pyrit.target",
+            params={"model_name": "gpt-4", "temperature": 0.0},
+        )
+        id1 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child1})
+        id2 = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score", children={"prompt_target": child2})
+
+        assert ScorerEvaluationIdentity(id1).eval_hash != ScorerEvaluationIdentity(id2).eval_hash
+
+    def test_eval_hash_matches_free_function(self):
+        """Test that eval_hash matches calling compute_eval_hash with scorer constants."""
+        cid = ComponentIdentifier(class_name="MyScorer", class_module="pyrit.score", params={"k": "v"})
+        identity = ScorerEvaluationIdentity(cid)
+
+        expected = compute_eval_hash(
+            cid,
+            target_child_keys=ScorerEvaluationIdentity.TARGET_CHILD_KEYS,
+            behavioral_child_params=ScorerEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS,
+        )
+        assert identity.eval_hash == expected
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestScorerGetEvalHash:
+    """Tests for Scorer.get_eval_hash() convenience method (adapted from old TestGetEvalHash)."""
+
+    def test_get_eval_hash_uses_scorer_identity(self):
+        """Test that Scorer.get_eval_hash() delegates to ScorerEvaluationIdentity."""
+
+        class FakeScorer(Identifiable):
+
+            def _build_identifier(self) -> ComponentIdentifier:
+                child = ComponentIdentifier(
+                    class_name="Target",
+                    class_module="pyrit.target",
+                    params={"model_name": "gpt-4", "endpoint": "https://example.com"},
+                )
+                return ComponentIdentifier.of(self, children={"prompt_target": child})
+
+        scorer = FakeScorer()
+        identifier = scorer.get_identifier()
+        eval_hash = ScorerEvaluationIdentity(identifier).eval_hash
+
+        expected = compute_eval_hash(
+            identifier,
+            target_child_keys=ScorerEvaluationIdentity.TARGET_CHILD_KEYS,
+            behavioral_child_params=ScorerEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS,
+        )
+        assert eval_hash == expected
+
+    def test_get_eval_hash_filters_operational_params(self):
+        """Test that Scorer.get_eval_hash() filters operational params from target children."""
+
+        class ScorerLike(Identifiable):
+
+            def __init__(self, *, endpoint: str):
+                self._endpoint = endpoint
+
+            def _build_identifier(self) -> ComponentIdentifier:
+                child = ComponentIdentifier(
+                    class_name="Target",
+                    class_module="pyrit.target",
+                    params={"model_name": "gpt-4", "endpoint": self._endpoint},
+                )
+                return ComponentIdentifier.of(self, children={"prompt_target": child})
+
+        scorer_a = ScorerLike(endpoint="https://endpoint-a.com")
+        scorer_b = ScorerLike(endpoint="https://endpoint-b.com")
+
+        hash_a = ScorerEvaluationIdentity(scorer_a.get_identifier()).eval_hash
+        hash_b = ScorerEvaluationIdentity(scorer_b.get_identifier()).eval_hash
+
+        # Different endpoints should produce same eval hash (operational param filtered)
+        assert hash_a == hash_b
+        # But different component hashes (endpoint is in full identity)
+        assert scorer_a.get_identifier().hash != scorer_b.get_identifier().hash
+
+    def test_get_eval_hash_no_target_children_equals_component_hash(self):
+        """Test that eval hash equals component hash when there are no target children."""
+
+        class SimpleScorer(Identifiable):
+
+            def _build_identifier(self) -> ComponentIdentifier:
+                return ComponentIdentifier.of(self, params={"key": "value"})
+
+        scorer = SimpleScorer()
+        identifier = scorer.get_identifier()
+        eval_hash = ScorerEvaluationIdentity(identifier).eval_hash
+
+        # No children named "prompt_target" or "converter_target", so no filtering occurs
+        assert eval_hash == identifier.hash

From b8f13cc8af266d9d09eacfb23585be5eb3fd956d Mon Sep 17 00:00:00 2001
From: Richard Lundeen <rlundeen@microsoft.com>
Date: Tue, 3 Mar 2026 13:13:51 -0800
Subject: [PATCH 4/5] pre-commit

---
 pyrit/identifiers/evaluation_identity.py           |  1 +
 tests/unit/identifiers/test_evaluation_identity.py | 14 ++++++++++----
 .../unit/score/test_scorer_evaluation_identity.py  |  8 ++------
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/pyrit/identifiers/evaluation_identity.py b/pyrit/identifiers/evaluation_identity.py
index 64492a66b2..d618978beb 100644
--- a/pyrit/identifiers/evaluation_identity.py
+++ b/pyrit/identifiers/evaluation_identity.py
@@ -155,6 +155,7 @@ class EvaluationIdentity(ABC):
     BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]]
 
     def __init__(self, identifier: ComponentIdentifier) -> None:
+        """Wrap a ComponentIdentifier and eagerly compute its eval hash."""
         self._identifier = identifier
         self._eval_hash = compute_eval_hash(
             identifier,
diff --git a/tests/unit/identifiers/test_evaluation_identity.py b/tests/unit/identifiers/test_evaluation_identity.py
index 3bbbdd8759..af572922b8 100644
--- a/tests/unit/identifiers/test_evaluation_identity.py
+++ b/tests/unit/identifiers/test_evaluation_identity.py
@@ -15,11 +15,11 @@
 from pyrit.identifiers import ComponentIdentifier, compute_eval_hash
 from pyrit.identifiers.evaluation_identity import EvaluationIdentity, _build_eval_dict
 
-
 # ---------------------------------------------------------------------------
 # Concrete subclass for testing the ABC
 # ---------------------------------------------------------------------------
 
+
 class _StubEvaluationIdentity(EvaluationIdentity):
     """Minimal concrete subclass for testing the abstract base class."""
 
@@ -106,8 +106,12 @@ class TestComputeEvalHash:
     def test_deterministic(self):
         """Test that the same identifier + config produces the same hash."""
         identifier = ComponentIdentifier(class_name="Scorer", class_module="pyrit.score")
-        h1 = compute_eval_hash(identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS)
-        h2 = compute_eval_hash(identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS)
+        h1 = compute_eval_hash(
+            identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
+        h2 = compute_eval_hash(
+            identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
         assert h1 == h2
 
     def test_empty_target_child_keys_returns_component_hash(self):
@@ -133,7 +137,9 @@ def test_empty_target_child_keys_returns_component_hash(self):
     def test_returns_64_char_hex(self):
         """Test that the hash is a 64-char lowercase hex string (SHA-256)."""
         identifier = ComponentIdentifier(class_name="S", class_module="m")
-        result = compute_eval_hash(identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS)
+        result = compute_eval_hash(
+            identifier, target_child_keys=_TARGET_CHILD_KEYS, behavioral_child_params=_BEHAVIORAL_CHILD_PARAMS
+        )
         assert len(result) == 64
         assert all(c in "0123456789abcdef" for c in result)
 
diff --git a/tests/unit/score/test_scorer_evaluation_identity.py b/tests/unit/score/test_scorer_evaluation_identity.py
index f1635fcb10..208f9aecc0 100644
--- a/tests/unit/score/test_scorer_evaluation_identity.py
+++ b/tests/unit/score/test_scorer_evaluation_identity.py
@@ -8,7 +8,6 @@
 the ``Scorer.get_eval_hash()`` convenience method.
 """
 
-from typing import ClassVar
 
 import pytest
 
@@ -21,11 +20,11 @@ class TestScorerEvaluationIdentityConstants:
 
     def test_target_child_keys(self):
         """Test that TARGET_CHILD_KEYS contains the expected scorer target names."""
-        assert ScorerEvaluationIdentity.TARGET_CHILD_KEYS == frozenset({"prompt_target", "converter_target"})
+        assert frozenset({"prompt_target", "converter_target"}) == ScorerEvaluationIdentity.TARGET_CHILD_KEYS
 
     def test_behavioral_child_params(self):
         """Test that BEHAVIORAL_CHILD_PARAMS contains the expected behavioral params."""
-        assert ScorerEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS == frozenset({"model_name", "temperature", "top_p"})
+        assert frozenset({"model_name", "temperature", "top_p"}) == ScorerEvaluationIdentity.BEHAVIORAL_CHILD_PARAMS
 
 
 class TestScorerEvaluationIdentityEvalHash:
@@ -93,7 +92,6 @@ def test_get_eval_hash_uses_scorer_identity(self):
         """Test that Scorer.get_eval_hash() delegates to ScorerEvaluationIdentity."""
 
         class FakeScorer(Identifiable):
-
             def _build_identifier(self) -> ComponentIdentifier:
                 child = ComponentIdentifier(
                     class_name="Target",
@@ -117,7 +115,6 @@ def test_get_eval_hash_filters_operational_params(self):
         """Test that Scorer.get_eval_hash() filters operational params from target children."""
 
         class ScorerLike(Identifiable):
-
             def __init__(self, *, endpoint: str):
                 self._endpoint = endpoint
 
@@ -144,7 +141,6 @@ def test_get_eval_hash_no_target_children_equals_component_hash(self):
         """Test that eval hash equals component hash when there are no target children."""
 
         class SimpleScorer(Identifiable):
-
             def _build_identifier(self) -> ComponentIdentifier:
                 return ComponentIdentifier.of(self, params={"key": "value"})
 

From 59423e226308aaee0cba1deb394c48b706cbf934 Mon Sep 17 00:00:00 2001
From: Richard Lundeen <rlundeen@microsoft.com>
Date: Tue, 3 Mar 2026 14:40:33 -0800
Subject: [PATCH 5/5] pre-commit

---
 pyrit/identifiers/evaluation_identity.py            | 10 +++++++---
 tests/unit/score/test_scorer_evaluation_identity.py |  1 -
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pyrit/identifiers/evaluation_identity.py b/pyrit/identifiers/evaluation_identity.py
index d618978beb..9a0dd187d5 100644
--- a/pyrit/identifiers/evaluation_identity.py
+++ b/pyrit/identifiers/evaluation_identity.py
@@ -55,9 +55,13 @@ def _build_eval_dict(
         ComponentIdentifier.KEY_CLASS_MODULE: identifier.class_module,
     }
 
-    for key, value in sorted(identifier.params.items()):
-        if value is not None and (param_allowlist is None or key in param_allowlist):
-            eval_dict[key] = value
+    eval_dict.update(
+        {
+            key: value
+            for key, value in sorted(identifier.params.items())
+            if value is not None and (param_allowlist is None or key in param_allowlist)
+        }
+    )
 
     if identifier.children:
         eval_children: dict[str, Any] = {}
diff --git a/tests/unit/score/test_scorer_evaluation_identity.py b/tests/unit/score/test_scorer_evaluation_identity.py
index 208f9aecc0..cc61acdde6 100644
--- a/tests/unit/score/test_scorer_evaluation_identity.py
+++ b/tests/unit/score/test_scorer_evaluation_identity.py
@@ -8,7 +8,6 @@
 the ``Scorer.get_eval_hash()`` convenience method.
 """
 
-
 import pytest
 
 from pyrit.identifiers import ComponentIdentifier, Identifiable, compute_eval_hash