From 00f748a1d7a27857399c2d67a18f8a5b7ca0711d Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Tue, 7 Apr 2026 14:54:08 +0000 Subject: [PATCH 1/3] fix: allow eval filtering with EvalCase objects --- py/src/braintrust/framework.py | 7 ++- py/src/braintrust/test_framework.py | 80 +++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index 2eeb00de..378aa852 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -9,7 +9,7 @@ import traceback import warnings from collections import defaultdict -from collections.abc import Awaitable, Callable, Coroutine, Iterable, Iterator, Sequence +from collections.abc import Awaitable, Callable, Coroutine, Iterable, Iterator, Mapping, Sequence from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager from multiprocessing import cpu_count @@ -1154,7 +1154,10 @@ def parse_filters(filters: list[str]) -> list[Filter]: def evaluate_filter(object, filter: Filter): key = object for p in filter.path: - key = key.get(p) + if isinstance(key, Mapping): + key = key.get(p) + else: + key = getattr(key, p, None) if key is None: return False return filter.pattern.match(serialize_json_with_plain_string(key)) is not None diff --git a/py/src/braintrust/test_framework.py b/py/src/braintrust/test_framework.py index 7d33eda0..e0d5470d 100644 --- a/py/src/braintrust/test_framework.py +++ b/py/src/braintrust/test_framework.py @@ -1,4 +1,5 @@ import importlib.util +import re from typing import List from unittest.mock import MagicMock @@ -11,6 +12,9 @@ EvalHooks, EvalResultWithSummary, Evaluator, + Filter, + evaluate_filter, + parse_filters, run_evaluator, ) from .score import Score, Scorer @@ -626,3 +630,79 @@ async def test_run_evaluator_empty_dataset_warns(capsys): captured = capsys.readouterr() assert "Warning" in captured.err assert "empty" in captured.err.lower() + + +class TestEvaluateFilter: + """Tests for evaluate_filter with both dict and EvalCase inputs (issue #207).""" + + @pytest.mark.parametrize( + "datum", + [ + {"input": "hello", "metadata": {"name": "foo"}}, + EvalCase(input="hello", metadata={"name": "foo"}), + ], + ids=["dict", "evalcase"], + ) + def test_evaluate_filter_match(self, datum): + f = Filter(path=["metadata", "name"], pattern=re.compile("foo")) + assert evaluate_filter(datum, f) is True + + @pytest.mark.parametrize( + "datum", + [ + {"input": "hello", "metadata": {"name": "bar"}}, + EvalCase(input="hello", metadata={"name": "bar"}), + ], + ids=["dict", "evalcase"], + ) + def test_evaluate_filter_no_match(self, datum): + f = Filter(path=["metadata", "name"], pattern=re.compile("foo")) + assert evaluate_filter(datum, f) is False + + @pytest.mark.parametrize( + "datum", + [ + {"input": "hello"}, + EvalCase(input="hello"), + ], + ids=["dict", "evalcase"], + ) + def test_evaluate_filter_missing_key(self, datum): + f = Filter(path=["metadata", "name"], pattern=re.compile("foo")) + assert evaluate_filter(datum, f) is False + + def test_evaluate_filter_nested_metadata(self): + datum = EvalCase(input="hello", metadata={"priority": "P0", "owner": "alice"}) + f = Filter(path=["metadata", "priority"], pattern=re.compile("^P0$")) + assert evaluate_filter(datum, f) is True + + def test_evaluate_filter_input_field(self): + datum = EvalCase(input={"text": "hello world"}, metadata={"name": "foo"}) + f = Filter(path=["input", "text"], pattern=re.compile("hello")) + assert evaluate_filter(datum, f) is True + + +@pytest.mark.asyncio +async def test_run_evaluator_with_filter_and_evalcase(): + """Integration test: run_evaluator with filters and EvalCase data (issue #207).""" + data = [ + EvalCase(input="hello", metadata={"name": "foo"}), + EvalCase(input="world", metadata={"name": "bar"}), + ] + + evaluator = Evaluator( + project_name="test-project", + eval_name="test-filter-evalcase", + data=data, + task=lambda x: x, + scores=[], + experiment_name=None, + metadata=None, + ) + + filters = parse_filters(["metadata.name=foo"]) + result = await run_evaluator(experiment=None, evaluator=evaluator, position=None, filters=filters) + + # Only the "foo" case should pass the filter + assert len(result.results) == 1 + assert result.results[0].input == "hello" From 09aca32188e83088312b50ca226d9ae48500d04b Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Tue, 7 Apr 2026 14:57:04 +0000 Subject: [PATCH 2/3] re-org test --- py/src/braintrust/test_framework.py | 45 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/py/src/braintrust/test_framework.py b/py/src/braintrust/test_framework.py index e0d5470d..9c55817d 100644 --- a/py/src/braintrust/test_framework.py +++ b/py/src/braintrust/test_framework.py @@ -633,7 +633,7 @@ async def test_run_evaluator_empty_dataset_warns(capsys): class TestEvaluateFilter: - """Tests for evaluate_filter with both dict and EvalCase inputs (issue #207).""" + """Regression tests for https://github.com/braintrustdata/braintrust-sdk-python/issues/207.""" @pytest.mark.parametrize( "datum", @@ -682,27 +682,26 @@ def test_evaluate_filter_input_field(self): assert evaluate_filter(datum, f) is True -@pytest.mark.asyncio -async def test_run_evaluator_with_filter_and_evalcase(): - """Integration test: run_evaluator with filters and EvalCase data (issue #207).""" - data = [ - EvalCase(input="hello", metadata={"name": "foo"}), - EvalCase(input="world", metadata={"name": "bar"}), - ] - - evaluator = Evaluator( - project_name="test-project", - eval_name="test-filter-evalcase", - data=data, - task=lambda x: x, - scores=[], - experiment_name=None, - metadata=None, - ) + @pytest.mark.asyncio + async def test_run_evaluator_with_filter_and_evalcase(self): + data = [ + EvalCase(input="hello", metadata={"name": "foo"}), + EvalCase(input="world", metadata={"name": "bar"}), + ] + + evaluator = Evaluator( + project_name="test-project", + eval_name="test-filter-evalcase", + data=data, + task=lambda x: x, + scores=[], + experiment_name=None, + metadata=None, + ) - filters = parse_filters(["metadata.name=foo"]) - result = await run_evaluator(experiment=None, evaluator=evaluator, position=None, filters=filters) + filters = parse_filters(["metadata.name=foo"]) + result = await run_evaluator(experiment=None, evaluator=evaluator, position=None, filters=filters) - # Only the "foo" case should pass the filter - assert len(result.results) == 1 - assert result.results[0].input == "hello" + # Only the "foo" case should pass the filter + assert len(result.results) == 1 + assert result.results[0].input == "hello" From 9aed343ce1ce48559ab14fce8b9371e9d0c824ef Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Tue, 7 Apr 2026 15:00:50 +0000 Subject: [PATCH 3/3] fixup --- py/src/braintrust/test_framework.py | 1 - 1 file changed, 1 deletion(-) diff --git a/py/src/braintrust/test_framework.py b/py/src/braintrust/test_framework.py index 9c55817d..4f6175d2 100644 --- a/py/src/braintrust/test_framework.py +++ b/py/src/braintrust/test_framework.py @@ -681,7 +681,6 @@ def test_evaluate_filter_input_field(self): f = Filter(path=["input", "text"], pattern=re.compile("hello")) assert evaluate_filter(datum, f) is True - @pytest.mark.asyncio async def test_run_evaluator_with_filter_and_evalcase(self): data = [