Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added src/tests/__init__.py
Empty file.
29 changes: 29 additions & 0 deletions src/tests/configs/ner-config_extractonly.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# agent config
agent_config:
extractor_agent:
role: >
Neuroscience NER Extractor Agent
goal: >
Extract named entities and key terms from neuroscience text {input_text}. Return structured JSON.
backstory: >
You are an AI assistant for neuroscience NER. You do not hallucinate. Output strict JSON: entities and key_terms.
llm:
model: openrouter/openai/gpt-4o-mini
base_url: https://openrouter.ai/api/v1

# task config (pipeline variables: input_text → extracted_structured_information → aligned_structured_information → judged_structured_information_with_human_feedback)
task_config:
extraction_task:
description: >
Extract entities and key_terms from the input text. Use the NER tool on {input_text}. Return JSON with entities (entity, label, sentence, start, end, paper_location, paper_title, doi) and key_terms.
expected_output: >
JSON: { "entities": [...], "key_terms": [...] }
agent_id: extractor_agent


# embedding config
embedder_config:
provider: ollama
config:
api_base: http://localhost:11434
model: nomic-embed-text
20 changes: 20 additions & 0 deletions src/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os
from pathlib import Path
import logging

import pytest
from dotenv import load_dotenv


ENV_PATH = Path(__file__).parent / "configs/.env_example"

skip_if_no_openrouter = pytest.mark.skipif(
not os.environ.get("OPENROUTER_API_KEY"),
reason="OPENROUTER_API_KEY not set",
)


@pytest.fixture(scope="module")
def load_env():
logging.info("Loading environment variables from: %s", ENV_PATH)
load_dotenv(ENV_PATH, override=True)
50 changes: 22 additions & 28 deletions src/tests/ner_test.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,38 @@
"""Testing Simple NER examples."""

from pathlib import Path
import json
import asyncio
import yaml

import pytest
from click.testing import CliRunner

from structsense.cli import cli
from structsense.app import StructSenseFlow
from .conftest import skip_if_no_openrouter

pytestmark = pytest.mark.requires_openrouter
pytestmark = [pytest.mark.usefixtures("load_env"), pytest.mark.requires_openrouter]

CONFIG_PATH = str(Path(__file__).parent / "configs/ner-config_free.yaml")
ENV_PATH = str(Path(__file__).parent / "configs/.env_example")
CONFIG_PATH = Path(__file__).parent / "configs/ner-config_extractonly.yaml"
SOURCE_TEXT = "Retinal ganglion cell (RGC) axons and synapses were genetically labeled via AAV transduction"


def test_ner_1(tmp_path):
"""Test the ENR extraction with a simple text input and a free model."""
runner = CliRunner()
# uses OPENROUTER_API_KEY set as environment variable (not provided in the env file) for authentication (the model is free)
result = runner.invoke(
cli,
[
"extract",
"--env_file",
ENV_PATH,
"--config",
CONFIG_PATH,
"--source_text",
SOURCE_TEXT,
"--save_file",
str(tmp_path / "enr_result.json"),
],
)
@skip_if_no_openrouter
def test_ner_1():
Comment thread
djarecka marked this conversation as resolved.
"""Test the NER extraction with a simple text input.
It only checks if it extracts any entities, since I observed inconsistency.
"""
with open(CONFIG_PATH) as f:
config = yaml.safe_load(f)

with open(tmp_path / "enr_result.json", "r") as f:
enr_result = json.load(f)
flow = StructSenseFlow(
agent_config=config["agent_config"],
task_config=config["task_config"],
embedder_config=config.get("embedder_config", {}),
source_text=SOURCE_TEXT,
)
enr_result = asyncio.run(flow.information_extraction_task())

# testing if we get any entities
assert result.exit_code == 0
# uses OPENROUTER_API_KEY set as environment variable for authentication
assert enr_result is not None
assert "entities" in enr_result
assert len(enr_result["entities"]) > 0
# print the extracted entities for visual inspection (hard to assert exact entities, at least with this model)
Expand Down
16 changes: 2 additions & 14 deletions src/tests/task_detection_test.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
"""Tests for task_type detection in StructSenseFlow."""

from pathlib import Path
import os

import pytest
from dotenv import load_dotenv

from structsense.app import StructSenseFlow
from .conftest import skip_if_no_openrouter

skip_if_no_openrouter = pytest.mark.skipif(
not os.environ.get("OPENROUTER_API_KEY"),
reason="OPENROUTER_API_KEY not set",
)
pytestmark = pytest.mark.usefixtures("load_env")
Comment thread
djarecka marked this conversation as resolved.

ENV_PATH = Path(__file__).parent / "configs/.env_example"
SOURCE_TEXT_SHORT = "Retinal ganglion cell"

LLM_CONFIG = {
Expand All @@ -37,11 +30,6 @@
}


@pytest.fixture(autouse=True)
def load_env():
load_dotenv(ENV_PATH, override=True)


def make_flow(agent_config=None, task_config=None):
return StructSenseFlow(
agent_config=agent_config or BASE_AGENT_CONFIG,
Expand Down
Loading