Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,9 @@ API Reference

class_name_to_snake_case
ComponentIdentifier
compute_eval_hash
config_hash
EvaluationIdentity
Identifiable
snake_case_to_class_name

Expand Down
3 changes: 3 additions & 0 deletions pyrit/identifiers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
snake_case_to_class_name,
)
from pyrit.identifiers.component_identifier import ComponentIdentifier, Identifiable, config_hash
from pyrit.identifiers.evaluation_identity import EvaluationIdentity, compute_eval_hash

__all__ = [
"class_name_to_snake_case",
"ComponentIdentifier",
"compute_eval_hash",
"EvaluationIdentity",
"Identifiable",
"snake_case_to_class_name",
"config_hash",
Expand Down
178 changes: 178 additions & 0 deletions pyrit/identifiers/evaluation_identity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
Evaluation identity and eval-hash computation.

This module provides:

* ``_build_eval_dict`` — builds a filtered dict for eval-hash computation.
* ``compute_eval_hash`` — free function that computes a behavioral equivalence
hash from a ``ComponentIdentifier``.
* ``EvaluationIdentity`` — abstract base that wraps a ``ComponentIdentifier``
with domain-specific eval-hash configuration. Concrete subclasses declare
*which* children are targets and *which* params are behavioral via two
``ClassVar`` frozensets.
"""

from __future__ import annotations

from abc import ABC
from typing import Any, ClassVar, Optional

from pyrit.identifiers.component_identifier import ComponentIdentifier, config_hash


def _build_eval_dict(
identifier: ComponentIdentifier,
*,
target_child_keys: frozenset[str],
behavioral_child_params: frozenset[str],
param_allowlist: Optional[frozenset[str]] = None,
) -> dict[str, Any]:
"""
Build a filtered dictionary for eval-hash computation.

Includes only behavioral parameters. For child components whose names appear
in ``target_child_keys``, only params in ``behavioral_child_params`` are kept
(stripping operational params like endpoint, max_requests_per_minute).
Non-target children receive full eval treatment recursively.

Args:
identifier (ComponentIdentifier): The component identity to process.
target_child_keys (frozenset[str]): Child names that are targets
(e.g., ``{"prompt_target", "converter_target"}``).
behavioral_child_params (frozenset[str]): Param allowlist applied to
target children (e.g., ``{"model_name", "temperature", "top_p"}``).
param_allowlist (Optional[frozenset[str]]): If provided, only include
params whose keys are in the allowlist. If None, include all params.

Returns:
dict[str, Any]: The filtered dictionary suitable for hashing.
"""
eval_dict: dict[str, Any] = {
ComponentIdentifier.KEY_CLASS_NAME: identifier.class_name,
ComponentIdentifier.KEY_CLASS_MODULE: identifier.class_module,
}

eval_dict.update(
{
key: value
for key, value in sorted(identifier.params.items())
if value is not None and (param_allowlist is None or key in param_allowlist)
}
)

if identifier.children:
eval_children: dict[str, Any] = {}
for name in sorted(identifier.children):
child_list = identifier.get_child_list(name)
if name in target_child_keys:
# Targets: filter to behavioral params only
hashes = [
config_hash(
_build_eval_dict(
c,
target_child_keys=target_child_keys,
behavioral_child_params=behavioral_child_params,
param_allowlist=behavioral_child_params,
)
)
for c in child_list
]
else:
# Non-targets (e.g., sub-scorers): full eval treatment, recurse without param filtering
hashes = [
config_hash(
_build_eval_dict(
c,
target_child_keys=target_child_keys,
behavioral_child_params=behavioral_child_params,
)
)
for c in child_list
]
eval_children[name] = hashes[0] if len(hashes) == 1 else hashes
if eval_children:
eval_dict["children"] = eval_children

return eval_dict


def compute_eval_hash(
identifier: ComponentIdentifier,
*,
target_child_keys: frozenset[str],
behavioral_child_params: frozenset[str],
) -> str:
"""
Compute a behavioral equivalence hash for evaluation grouping.

Unlike ``ComponentIdentifier.hash`` (which includes all params of self and
children), the eval hash filters child components that are "targets" to only
their behavioral params (e.g., model_name, temperature, top_p), stripping
operational params like endpoint or max_requests_per_minute. This ensures the
same logical configuration on different deployments produces the same eval hash.

Non-target children (e.g., sub-scorers) receive full recursive eval treatment.

When ``target_child_keys`` is empty, no child filtering occurs and the result
equals ``identifier.hash``.

Args:
identifier (ComponentIdentifier): The component identity to compute the hash for.
target_child_keys (frozenset[str]): Child names that are targets
(e.g., ``{"prompt_target", "converter_target"}``).
behavioral_child_params (frozenset[str]): Param allowlist for target children
(e.g., ``{"model_name", "temperature", "top_p"}``).

Returns:
str: A hex-encoded SHA256 hash suitable for eval registry keying.
"""
if not target_child_keys:
return identifier.hash

eval_dict = _build_eval_dict(
identifier,
target_child_keys=target_child_keys,
behavioral_child_params=behavioral_child_params,
)
return config_hash(eval_dict)


class EvaluationIdentity(ABC):
"""
Wraps a ``ComponentIdentifier`` with domain-specific eval-hash configuration.

Subclasses must set the two ``ClassVar`` frozensets:

* ``TARGET_CHILD_KEYS`` — child names whose operational params should be
stripped (e.g., ``{"prompt_target", "converter_target"}``).
* ``BEHAVIORAL_CHILD_PARAMS`` — param allowlist applied to those target
children (e.g., ``{"model_name", "temperature", "top_p"}``).

The concrete ``eval_hash`` property delegates to the module-level
``compute_eval_hash`` free function.
"""

TARGET_CHILD_KEYS: ClassVar[frozenset[str]]
BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]]

def __init__(self, identifier: ComponentIdentifier) -> None:
"""Wrap a ComponentIdentifier and eagerly compute its eval hash."""
self._identifier = identifier
self._eval_hash = compute_eval_hash(
identifier,
target_child_keys=self.TARGET_CHILD_KEYS,
behavioral_child_params=self.BEHAVIORAL_CHILD_PARAMS,
)

@property
def identifier(self) -> ComponentIdentifier:
"""The underlying component identity."""
return self._identifier

@property
def eval_hash(self) -> str:
"""Behavioral equivalence hash for evaluation grouping."""
return self._eval_hash
9 changes: 4 additions & 5 deletions pyrit/models/scenario_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,14 @@ def get_scorer_evaluation_metrics(self) -> Optional["ScorerMetrics"]:

"""
# import here to avoid circular imports
from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
from pyrit.score.scorer_evaluation.scorer_metrics_io import (
find_objective_metrics_by_hash,
find_objective_metrics_by_eval_hash,
)

if not self.objective_scorer_identifier:
return None

scorer_hash = self.objective_scorer_identifier.hash
if not scorer_hash:
return None
eval_hash = ScorerEvaluationIdentity(self.objective_scorer_identifier).eval_hash

return find_objective_metrics_by_hash(hash=scorer_hash)
return find_objective_metrics_by_eval_hash(eval_hash=eval_hash)
8 changes: 5 additions & 3 deletions pyrit/score/float_scale/float_scale_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,16 @@ def get_scorer_metrics(self) -> Optional["HarmScorerMetrics"]:
HarmScorerMetrics: The metrics for this scorer, or None if not found or not configured.
"""
from pyrit.score.scorer_evaluation.scorer_metrics_io import (
find_harm_metrics_by_hash,
find_harm_metrics_by_eval_hash,
)

if self.evaluation_file_mapping is None or self.evaluation_file_mapping.harm_category is None:
return None
scorer_hash = self.get_identifier().hash

return find_harm_metrics_by_hash(hash=scorer_hash, harm_category=self.evaluation_file_mapping.harm_category)
return find_harm_metrics_by_eval_hash(
eval_hash=self.get_eval_hash(),
harm_category=self.evaluation_file_mapping.harm_category,
)

async def _score_value_with_llm(
self,
Expand Down
18 changes: 10 additions & 8 deletions pyrit/score/printer/console_scorer_printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,19 @@ def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> N
Args:
scorer_identifier (ComponentIdentifier): The scorer identifier to print information for.
"""
from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
from pyrit.score.scorer_evaluation.scorer_metrics_io import (
find_objective_metrics_by_hash,
find_objective_metrics_by_eval_hash,
)

print()
self._print_colored(f"{self._indent}📊 Scorer Information", Style.BRIGHT)
self._print_colored(f"{self._indent * 2}▸ Scorer Identifier", Fore.WHITE)
self._print_scorer_info(scorer_identifier, indent_level=3)

# Look up metrics by hash
scorer_hash = scorer_identifier.hash
metrics = find_objective_metrics_by_hash(hash=scorer_hash)
# Look up metrics by eval hash
eval_hash = ScorerEvaluationIdentity(scorer_identifier).eval_hash
metrics = find_objective_metrics_by_eval_hash(eval_hash=eval_hash)
self._print_objective_metrics(metrics)

def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_category: str) -> None:
Expand All @@ -127,18 +128,19 @@ def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_cate
scorer_identifier (ComponentIdentifier): The scorer identifier to print information for.
harm_category (str): The harm category for looking up metrics (e.g., "hate_speech", "violence").
"""
from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity
from pyrit.score.scorer_evaluation.scorer_metrics_io import (
find_harm_metrics_by_hash,
find_harm_metrics_by_eval_hash,
)

print()
self._print_colored(f"{self._indent}📊 Scorer Information", Style.BRIGHT)
self._print_colored(f"{self._indent * 2}▸ Scorer Identifier", Fore.WHITE)
self._print_scorer_info(scorer_identifier, indent_level=3)

# Look up metrics by hash and harm category
scorer_hash = scorer_identifier.hash
metrics = find_harm_metrics_by_hash(hash=scorer_hash, harm_category=harm_category)
# Look up metrics by eval hash and harm category
eval_hash = ScorerEvaluationIdentity(scorer_identifier).eval_hash
metrics = find_harm_metrics_by_eval_hash(eval_hash=eval_hash, harm_category=harm_category)
self._print_harm_metrics(metrics)

def _print_scorer_info(self, scorer_identifier: ComponentIdentifier, *, indent_level: int = 2) -> None:
Expand Down
20 changes: 18 additions & 2 deletions pyrit/score/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ class Scorer(Identifiable, abc.ABC):
Abstract base class for scorers.
"""

# Evaluation configuration - maps input dataset files to a result file
# Specifies glob patterns for datasets and a result file name
# Evaluation configuration - maps input dataset files to a result file.
# Specifies glob patterns for datasets and a result file name.
evaluation_file_mapping: Optional[ScorerEvalDatasetFiles] = None

_identifier: Optional[ComponentIdentifier] = None
Expand All @@ -70,6 +70,22 @@ def __init__(self, *, validator: ScorerPromptValidator):
"""
self._validator = validator

def get_eval_hash(self) -> str:
"""
Compute a behavioral equivalence hash for evaluation grouping.

Delegates to ``ScorerEvaluationIdentity`` which filters target children
(prompt_target, converter_target) to behavioral params only, so the same
scorer configuration on different deployments produces the same eval hash.

Returns:
str: A hex-encoded SHA256 hash suitable for eval registry keying.
"""
# Deferred import to avoid circular dependency (scorer_evaluation_identity → identifiers → …)
from pyrit.score.scorer_evaluation.scorer_evaluation_identity import ScorerEvaluationIdentity

return ScorerEvaluationIdentity(self.get_identifier()).eval_hash

@property
def scorer_type(self) -> ScoreType:
"""
Expand Down
28 changes: 28 additions & 0 deletions pyrit/score/scorer_evaluation/scorer_evaluation_identity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
Scorer-specific evaluation identity.

``ScorerEvaluationIdentity`` declares which children are "targets" and which
target params are behavioral for the scorer evaluation domain.
"""

from __future__ import annotations

from typing import ClassVar

from pyrit.identifiers.evaluation_identity import EvaluationIdentity


class ScorerEvaluationIdentity(EvaluationIdentity):
"""
Evaluation identity for scorers.

Target children (``prompt_target``, ``converter_target``) are filtered to
behavioral params only (``model_name``, ``temperature``, ``top_p``), so the
same scorer configuration on different deployments produces the same eval hash.
"""

TARGET_CHILD_KEYS: ClassVar[frozenset[str]] = frozenset({"prompt_target", "converter_target"})
BEHAVIORAL_CHILD_PARAMS: ClassVar[frozenset[str]] = frozenset({"model_name", "temperature", "top_p"})
Loading