sensein · djarecka · Apr 13, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/src/tests/__init__.py b/src/tests/__init__.py
diff --git a/src/tests/configs/ner-config_extractonly.yaml b/src/tests/configs/ner-config_extractonly.yaml
@@ -0,0 +1,29 @@
+# agent config
+agent_config:
+  extractor_agent:
+    role: >
+      Neuroscience NER Extractor Agent
+    goal: >
+      Extract named entities and key terms from neuroscience text {input_text}.  Return structured JSON.
+    backstory: >
+      You are an AI assistant for neuroscience NER. You do not hallucinate. Output strict JSON: entities and key_terms.
+    llm:
+      model: openrouter/openai/gpt-4o-mini
+      base_url: https://openrouter.ai/api/v1
+
+# task config (pipeline variables: input_text → extracted_structured_information → aligned_structured_information → judged_structured_information_with_human_feedback)
+task_config:
+  extraction_task:
+    description: >
+      Extract entities and key_terms from the input text. Use the NER tool on {input_text}. Return JSON with entities (entity, label, sentence, start, end, paper_location, paper_title, doi) and key_terms.
+    expected_output: >
+      JSON: { "entities": [...], "key_terms": [...] }
+    agent_id: extractor_agent
+
+
+# embedding config
+embedder_config:
+  provider: ollama
+  config:
+    api_base: http://localhost:11434
+    model: nomic-embed-text
diff --git a/src/tests/conftest.py b/src/tests/conftest.py
@@ -0,0 +1,20 @@
+import os
+from pathlib import Path
+import logging
+
+import pytest
+from dotenv import load_dotenv
+
+
+ENV_PATH = Path(__file__).parent / "configs/.env_example"
+
+skip_if_no_openrouter = pytest.mark.skipif(
+    not os.environ.get("OPENROUTER_API_KEY"),
+    reason="OPENROUTER_API_KEY not set",
+)
+
+
+@pytest.fixture(scope="module")
+def load_env():
+    logging.info("Loading environment variables from: %s", ENV_PATH)
+    load_dotenv(ENV_PATH, override=True)
diff --git a/src/tests/ner_test.py b/src/tests/ner_test.py
@@ -1,44 +1,38 @@
 """Testing Simple NER examples."""
 
 from pathlib import Path
-import json
+import asyncio
+import yaml
 
 import pytest
-from click.testing import CliRunner
 
-from structsense.cli import cli
+from structsense.app import StructSenseFlow
+from .conftest import skip_if_no_openrouter
 
-pytestmark = pytest.mark.requires_openrouter
+pytestmark = [pytest.mark.usefixtures("load_env"), pytest.mark.requires_openrouter]
 
-CONFIG_PATH = str(Path(__file__).parent / "configs/ner-config_free.yaml")
-ENV_PATH = str(Path(__file__).parent / "configs/.env_example")
+CONFIG_PATH = Path(__file__).parent / "configs/ner-config_extractonly.yaml"
 SOURCE_TEXT = "Retinal ganglion cell (RGC) axons and synapses were genetically labeled via AAV transduction"
 
 
-def test_ner_1(tmp_path):
-    """Test the ENR extraction with a simple text input and a free model."""
-    runner = CliRunner()
-    # uses OPENROUTER_API_KEY set as environment variable (not provided in the env file) for authentication (the model is free)
-    result = runner.invoke(
-        cli,
-        [
-            "extract",
-            "--env_file",
-            ENV_PATH,
-            "--config",
-            CONFIG_PATH,
-            "--source_text",
-            SOURCE_TEXT,
-            "--save_file",
-            str(tmp_path / "enr_result.json"),
-        ],
-    )
+@skip_if_no_openrouter
+def test_ner_1():
+    """Test the NER extraction with a simple text input.
+    It only checks if it extracts any entities, since I observed inconsistency.
+    """
+    with open(CONFIG_PATH) as f:
+        config = yaml.safe_load(f)
 
-    with open(tmp_path / "enr_result.json", "r") as f:
-        enr_result = json.load(f)
+    flow = StructSenseFlow(
+        agent_config=config["agent_config"],
+        task_config=config["task_config"],
+        embedder_config=config.get("embedder_config", {}),
+        source_text=SOURCE_TEXT,
+    )
+    enr_result = asyncio.run(flow.information_extraction_task())
 
-    # testing if we get any entities
-    assert result.exit_code == 0
+    # uses OPENROUTER_API_KEY set as environment variable for authentication
+    assert enr_result is not None
     assert "entities" in enr_result
     assert len(enr_result["entities"]) > 0
     # print the extracted entities for visual inspection (hard to assert exact entities, at least with this model)

diff --git a/src/tests/task_detection_test.py b/src/tests/task_detection_test.py
@@ -1,19 +1,12 @@
 """Tests for task_type detection in StructSenseFlow."""
 
-from pathlib import Path
-import os
-
 import pytest
-from dotenv import load_dotenv
 
 from structsense.app import StructSenseFlow
+from .conftest import skip_if_no_openrouter
 
-skip_if_no_openrouter = pytest.mark.skipif(
-    not os.environ.get("OPENROUTER_API_KEY"),
-    reason="OPENROUTER_API_KEY not set",
-)
+pytestmark = pytest.mark.usefixtures("load_env")
 
-ENV_PATH = Path(__file__).parent / "configs/.env_example"
 SOURCE_TEXT_SHORT = "Retinal ganglion cell"
 
 LLM_CONFIG = {
@@ -37,11 +30,6 @@
 }
 
 
-@pytest.fixture(autouse=True)
-def load_env():
-    load_dotenv(ENV_PATH, override=True)
-
-
 def make_flow(agent_config=None, task_config=None):
     return StructSenseFlow(
         agent_config=agent_config or BASE_AGENT_CONFIG,