From 807131dccc79ac7396e5d547189ea44cda46d38f Mon Sep 17 00:00:00 2001 From: Juexin Wang Date: Mon, 11 May 2026 16:29:20 +0000 Subject: [PATCH 1/6] feat: Add console script entrypoint for uvx execution support --- evalbench/__init__.py | 5 +++++ evalbench/evalbench.py | 32 +++++++++++++------------------- pyproject.toml | 3 +++ 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/evalbench/__init__.py b/evalbench/__init__.py index 58a5b2b9..5a082783 100644 --- a/evalbench/__init__.py +++ b/evalbench/__init__.py @@ -1,3 +1,8 @@ +import sys +import os + +sys.path.insert(0, os.path.dirname(__file__)) + from . import reporting from . import util from . import dataset diff --git a/evalbench/evalbench.py b/evalbench/evalbench.py index 51ebddc2..41151896 100644 --- a/evalbench/evalbench.py +++ b/evalbench/evalbench.py @@ -58,9 +58,7 @@ def eval(experiment_config: str): set_session_configs(session, parsed_config) # Load the configs - config, db_configs, model_config, setup_config = load_session_configs( - session - ) + config, db_configs, model_config, setup_config = load_session_configs(session) logging.info("Loaded Configurations in %s", experiment_config) # Load the dataset @@ -74,9 +72,7 @@ def eval(experiment_config: str): reporting_config = config.get("reporting") or {} csv_config = reporting_config.get("csv") or {} base_output_dir = csv_config.get("output_directory", "results") - session_dir = os.path.abspath( - os.path.join(base_output_dir, evaluator.job_id) - ) + session_dir = os.path.abspath(os.path.join(base_output_dir, evaluator.job_id)) set_up_script = config.get("set_up_script") if set_up_script: @@ -96,12 +92,8 @@ def eval(experiment_config: str): # Create Dataframes for reporting if results_tf is not None and scores_tf is not None: - reporters = get_reporters( - parsed_config.get("reporting"), job_id, run_time - ) - config_df = config_to_df( - job_id, run_time, config, model_config, db_configs - ) + reporters = get_reporters(parsed_config.get("reporting"), job_id, run_time) + config_df = config_to_df(job_id, run_time, config, model_config, db_configs) results = load_json(results_tf) results_df = report.get_dataframe(results) report.quick_summary(results_df) @@ -141,8 +133,7 @@ def eval(experiment_config: str): tear_down_script = config.get("tear_down_script") if tear_down_script: if os.path.exists(tear_down_script): - logging.info("Executing tear_down_script '%s'", - tear_down_script) + logging.info("Executing tear_down_script '%s'", tear_down_script) run_script(tear_down_script, session_dir, "teardown") else: logging.error( @@ -169,9 +160,7 @@ def run_suite(suite_config_path: str) -> bool: logging.error("No runs defined in suite config.") return False - logging.info( - f"Starting EvalBench Suite: {suite_conf.get('name', 'Unnamed Suite')}" - ) + logging.info(f"Starting EvalBench Suite: {suite_conf.get('name', 'Unnamed Suite')}") logging.info(f"Total runs scheduled: {len(runs)}") results = [] @@ -180,8 +169,7 @@ def run_suite(suite_config_path: str) -> bool: config_path = run.get("config_path") if not config_path: - logging.error( - f"Run '{run_name}' is missing 'config_path'. Skipping.") + logging.error(f"Run '{run_name}' is missing 'config_path'. Skipping.") results.append((run_name, False)) continue @@ -218,6 +206,12 @@ def main(argv: Sequence[str]): return os._exit(exit_code) +def run(): + """Starting function for the uvx package entrypoint.""" + app.run(main) + + + if __name__ == "__main__": # Required for PyInstaller multiprocessing support multiprocessing.freeze_support() diff --git a/pyproject.toml b/pyproject.toml index a61e6e3f..1eb85c20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,9 @@ dependencies = [ "dbt-postgres", ] +[project.scripts] +evalbench = "evalbench.evalbench:run" + [tool.setuptools.packages.find] include = ["evalbench*"] From 0e0a09767dac53790fe94549684b6196d953c093 Mon Sep 17 00:00:00 2001 From: Juexin Wang Date: Mon, 11 May 2026 16:31:22 +0000 Subject: [PATCH 2/6] docs: Add explanatory comments for package path injection --- evalbench/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/evalbench/__init__.py b/evalbench/__init__.py index 5a082783..038bc8cd 100644 --- a/evalbench/__init__.py +++ b/evalbench/__init__.py @@ -1,8 +1,12 @@ -import sys import os +import sys +# Inject package root into sys.path at import time. +# This allows legacy absolute imports (e.g. 'from reporting.report import ...') +# to resolve correctly when evalbench is installed as a packaged global CLI tool. sys.path.insert(0, os.path.dirname(__file__)) + from . import reporting from . import util from . import dataset From 307db9b4dce676210384e56d3ac08ab032525907 Mon Sep 17 00:00:00 2001 From: Juexin Wang Date: Mon, 11 May 2026 16:45:09 +0000 Subject: [PATCH 3/6] docs: Simplify path resolution comments in package init --- evalbench/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/evalbench/__init__.py b/evalbench/__init__.py index 038bc8cd..e3876b4c 100644 --- a/evalbench/__init__.py +++ b/evalbench/__init__.py @@ -1,9 +1,8 @@ import os import sys -# Inject package root into sys.path at import time. -# This allows legacy absolute imports (e.g. 'from reporting.report import ...') -# to resolve correctly when evalbench is installed as a packaged global CLI tool. +# Expose internal subdirectories to sys.path so legacy absolute imports +# (e.g. 'from reporting.report import ...') resolve correctly when run globally. sys.path.insert(0, os.path.dirname(__file__)) From 9546cffeaf22d67b4265c5c149f90ac67a45b559 Mon Sep 17 00:00:00 2001 From: Juexin Wang Date: Mon, 11 May 2026 16:54:16 +0000 Subject: [PATCH 4/6] style: Fix pycodestyle blank line E303 and W391 warnings in evalbench.py --- evalbench/evalbench.py | 1 - 1 file changed, 1 deletion(-) diff --git a/evalbench/evalbench.py b/evalbench/evalbench.py index 41151896..943a3240 100644 --- a/evalbench/evalbench.py +++ b/evalbench/evalbench.py @@ -211,7 +211,6 @@ def run(): app.run(main) - if __name__ == "__main__": # Required for PyInstaller multiprocessing support multiprocessing.freeze_support() From 6fff9124e236d1cbfa0b01e63c98ff847478243b Mon Sep 17 00:00:00 2001 From: Juexin Wang Date: Mon, 11 May 2026 17:21:39 +0000 Subject: [PATCH 5/6] style: Rename CLI command to google-evalbench to resolve python namespace clash --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1eb85c20..c4bc1c90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ dependencies = [ ] [project.scripts] -evalbench = "evalbench.evalbench:run" +google-evalbench = "evalbench.evalbench:run" [tool.setuptools.packages.find] include = ["evalbench*"] From 1ac64bb5f91a44c7527e9845156c519426123a3e Mon Sep 17 00:00:00 2001 From: Juexin Wang Date: Mon, 11 May 2026 17:35:34 +0000 Subject: [PATCH 6/6] docs: Succinctly clarify path append resolution inside __init__.py --- evalbench/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/evalbench/__init__.py b/evalbench/__init__.py index e3876b4c..c846848d 100644 --- a/evalbench/__init__.py +++ b/evalbench/__init__.py @@ -1,9 +1,9 @@ import os import sys -# Expose internal subdirectories to sys.path so legacy absolute imports -# (e.g. 'from reporting.report import ...') resolve correctly when run globally. -sys.path.insert(0, os.path.dirname(__file__)) +# Append package root to sys.path for legacy absolute imports. +# Using append (rather than insert) prevents namespace collisions in spawned child processes. +sys.path.append(os.path.dirname(__file__)) from . import reporting