Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,25 @@ license = {text = "Apache"}

dependencies = [
# Core evaluation framework dependencies
"ragas>=0.3.0",
"deepeval>=1.3.0",
"litellm>=1.0.0",
"pydantic>=2.0.0",
"ragas>=0.4.0",
"deepeval>=3.9.0",
"litellm>=1.80.0",
"pydantic>=2.10.0",
"pyyaml>=6.0",
"pandas>=2.1.4",
"datasets>=2.0.0",
"pandas>=2.2.0",
"datasets>=3.0.0",
"matplotlib>=3.5.0",
"seaborn>=0.11.0",
"numpy>=1.23.0",
"scipy>=1.10.0",
"scipy>=1.14.0",
# Agent evaluation dependencies (for future integration)
"httpx>=0.27.2",
"httpx>=0.28.0",
"tqdm>=4.67.1",
# Generate answers dependencies
"click>=8.0.0",
"diskcache>=5.6.3",
"tenacity>=9.1.2",
"langchain[huggingface]>=0.3.27",
"langchain[huggingface]>=0.3.30",
"langchain-google-genai>=2.0.0",
]

Expand All @@ -39,8 +39,8 @@ dependencies = [
# CPU (default, ~2GB): uv sync --extra local-embeddings
# GPU (~6GB): cp uv-gpu.lock uv.lock && uv sync --extra local-embeddings --frozen
local-embeddings = [
"torch>=2.0.0",
"sentence-transformers>=5.1.0",
"torch>=2.5.0",
"sentence-transformers>=5.2.0",
]

# NLP metrics dependencies - required for nlp:bleu, nlp:rouge, nlp:semantic_similarity_distance
Expand All @@ -59,12 +59,12 @@ dev = [
"bandit>=1.7.0",
"black>=25.1.0",
"mypy>=1.15.0",
"ruff>=0.8.0",
"ruff>=0.9.0",
"pyright>=1.1.401",
"pydocstyle>=6.3.0",
"pylint-pydantic>=0.3.0",
"pytest>=8.3.2",
"pytest-cov>=5.0.0",
"pytest-cov>=6.0.0",
"pytest-mock>=3.15.1",
"pytest-timeout>=2.4.0",
]
Expand Down
3 changes: 1 addition & 2 deletions src/lightspeed_evaluation/core/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Common constants for evaluation framework."""

# Map similarity measure strings to Ragas DistanceMeasure enum
from ragas.metrics import DistanceMeasure
from ragas.metrics.collections import DistanceMeasure

# NLP Metrics Constants - BLEU
DEFAULT_BLEU_MAX_NGRAM = 4 # Standard BLEU uses up to 4-grams
Expand Down
68 changes: 40 additions & 28 deletions src/lightspeed_evaluation/core/embedding/ragas.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
"""Ragas Embedding Manager - Ragas specific embedding wrapper."""
"""Ragas Embedding Manager - Ragas 0.4+ specific embedding wrapper."""

import logging
from typing import Any
from typing import Any, cast

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from ragas.cache import DiskCacheBackend
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.embeddings.base import BaseRagasEmbedding, embedding_factory

from lightspeed_evaluation.core.embedding.manager import EmbeddingManager
from lightspeed_evaluation.core.system.exceptions import ConfigurationError
Expand All @@ -16,38 +12,54 @@


class RagasEmbeddingManager: # pylint: disable=too-few-public-methods
"""Ragas Embedding Manager, modifies global ragas settings."""
"""Ragas Embedding Manager using embedding_factory for ragas 0.4+."""

def __init__(self, embedding_manager: EmbeddingManager):
"""Init RagasEmbeddingManager."""
"""Initialize RagasEmbeddingManager with embedding_factory.

Args:
embedding_manager: Pre-configured EmbeddingManager with validated parameters
"""
config = embedding_manager.config
self.config = config

embedding_class: Any
if config.provider == "openai":
embedding_class = OpenAIEmbeddings
elif config.provider == "huggingface":
# EmbeddingManager already validated sentence-transformers is available
embedding_class = HuggingFaceEmbeddings
elif config.provider == "gemini":
embedding_class = GoogleGenerativeAIEmbeddings
# Map provider names to litellm format
provider = config.provider.lower()
model = config.model

# Build the model string for litellm
# Only OpenAI, Gemini, and HuggingFace are supported
if provider == "openai":
model_str = model # OpenAI models don't need prefix
elif provider == "huggingface":
model_str = f"huggingface/{model}"
elif provider == "gemini":
model_str = f"gemini/{model}"
else:
logger.error("Unknown embedding provider: %s", config.provider)
raise ConfigurationError(f"Unknown embedding provider {config.provider}")

logger.debug(
"Using embedding provider: %s with model: %s",
config.provider,
config.model,
"Using embedding provider: %s with model: %s -> %s",
provider,
model,
model_str,
)

kwargs = config.provider_kwargs
if kwargs is None:
kwargs = {}
# Get additional provider kwargs
kwargs: dict[str, Any] = {}
if config.provider_kwargs:
kwargs.update(config.provider_kwargs)

cacher = None
if config.cache_enabled:
cacher = DiskCacheBackend(cache_dir=config.cache_dir)
self.embeddings = LangchainEmbeddingsWrapper(
embedding_class(model=config.model, **kwargs), cache=cacher
# Create embeddings using ragas 0.4+ embedding_factory with litellm
# Cast to BaseRagasEmbedding as embedding_factory returns union type
self.embeddings: BaseRagasEmbedding = cast(
BaseRagasEmbedding,
embedding_factory(
"litellm",
model=model_str,
**kwargs,
),
)

logger.info("Ragas Embedding Manager configured: %s/%s", provider, model)
15 changes: 2 additions & 13 deletions src/lightspeed_evaluation/core/llm/custom.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""Base Custom LLM class for evaluation framework."""

import logging
import os
from typing import Any, Optional, Union

import litellm
from litellm.exceptions import InternalServerError

from lightspeed_evaluation.core.llm.litellm_patch import setup_litellm_ssl
from lightspeed_evaluation.core.system.exceptions import LLMError

logger = logging.getLogger(__name__)
Expand All @@ -20,21 +20,10 @@ def __init__(self, model_name: str, llm_params: dict[str, Any]):
self.model_name = model_name
self.llm_params = llm_params

self.setup_ssl_verify()

# Always drop unsupported parameters for cross-provider compatibility
litellm.drop_params = True

def setup_ssl_verify(self) -> None:
"""Setup SSL verification based on LLM parameters."""
ssl_verify = self.llm_params.get("ssl_verify", True)

if ssl_verify:
# Use our combined certifi bundle (includes system + custom certs)
litellm.ssl_verify = os.environ.get("SSL_CERTIFI_BUNDLE", True)
else:
# Explicitly disable SSL verification
litellm.ssl_verify = False
setup_litellm_ssl(llm_params)

def call(
self,
Expand Down
102 changes: 97 additions & 5 deletions src/lightspeed_evaluation/core/llm/litellm_patch.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,96 @@
"""Global litellm patching for token tracking.
"""LiteLLM configuration for token tracking and Ragas 0.4 compatibility.

It patches litellm.completion and litellm.acompletion to automatically track tokens
for all LLM calls throughout the application.
This module configures litellm for two purposes:

1. TOKEN TRACKING: Wraps litellm.completion and litellm.acompletion to track
token usage for all LLM calls (Judge LLM metrics). We use function wrapping
rather than litellm's callback system because callbacks don't reliably
capture tokens in all execution paths.
Comment thread
bsatapat-jpg marked this conversation as resolved.

2. RAGAS 0.4 COMPATIBILITY: Ragas 0.4's score() method internally uses
asyncio.run() which creates a new event loop. LiteLLM's background
LoggingWorker task conflicts with this, causing:
"RuntimeError: Queue is bound to a different event loop"

We replace the LoggingWorker with a no-op implementation to avoid this.
This is safe because we don't use litellm's built-in observability features.
"""

import logging
import os
import warnings
from functools import wraps
from typing import Any

import litellm

from lightspeed_evaluation.core.llm.token_tracker import track_tokens
# Suppress coroutine warnings from litellm's async logging (cosmetic only)
warnings.filterwarnings(
"ignore",
message="coroutine.*was never awaited",
category=RuntimeWarning,
)

# pylint: disable=wrong-import-position
from lightspeed_evaluation.core.llm.token_tracker import track_tokens # noqa: E402

logger = logging.getLogger(__name__)


# Store original functions before patching
# =============================================================================
# RAGAS 0.4 COMPATIBILITY: No-op logging worker
# =============================================================================
# Replace litellm's LoggingWorker with a no-op to prevent event loop conflicts
# when Ragas creates new event loops via asyncio.run().


class _NoOpLoggingWorker:
"""No-op logging worker to prevent event loop conflicts with Ragas 0.4.

LiteLLM's LoggingWorker runs async tasks that conflict with Ragas's use of
asyncio.run(). This no-op replacement silently ignores all logging operations.

See: https://github.com/BerriAI/litellm/issues/17813
"""

def ensure_initialized_and_enqueue(self, *args: Any, **kwargs: Any) -> None:
"""No-op: silently ignore."""

def enqueue(self, *args: Any, **kwargs: Any) -> None:
"""No-op: silently ignore."""

def start(self) -> None:
"""No-op: nothing to start."""

def stop(self) -> None:
"""No-op: nothing to stop."""

def flush(self) -> None:
"""No-op: nothing to flush."""

def clear_queue(self) -> None:
"""No-op: nothing to clear."""


# Apply the no-op worker
try:
# pylint: disable=ungrouped-imports
import litellm.litellm_core_utils.logging_worker as logging_worker_module

logging_worker_module.GLOBAL_LOGGING_WORKER = _NoOpLoggingWorker() # type: ignore[assignment]
except (ImportError, AttributeError):
pass # Older versions of litellm may not have this

# Configure litellm to minimize async logging activity
litellm.suppress_debug_info = True


# =============================================================================
# TOKEN TRACKING: Wrap completion functions
# =============================================================================
# We wrap the completion functions rather than using callbacks because
# callbacks don't reliably capture tokens in all execution paths.

_original_completion = litellm.completion
_original_acompletion = litellm.acompletion

Expand Down Expand Up @@ -45,3 +120,20 @@ async def _acompletion_with_token_tracking(*args: Any, **kwargs: Any) -> Any:
# Patch litellm's completion functions to include token tracking
litellm.completion = _completion_with_token_tracking
litellm.acompletion = _acompletion_with_token_tracking


# =============================================================================
# SSL CONFIGURATION UTILITY
# =============================================================================
def setup_litellm_ssl(llm_params: dict[str, Any]) -> None:
"""Configure litellm SSL verification.

Args:
llm_params: Dictionary containing LLM parameters including 'ssl_verify'
"""
ssl_verify = llm_params.get("ssl_verify", True)

if ssl_verify:
litellm.ssl_verify = os.environ.get("SSL_CERTIFI_BUNDLE", True)
else:
litellm.ssl_verify = False
Loading
Loading