lightspeed-core · asamal4 · Mar 23, 2026 · Mar 13, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,25 +9,25 @@ license = {text = "Apache"}
 
 dependencies = [
     # Core evaluation framework dependencies
-    "ragas>=0.3.0",
-    "deepeval>=1.3.0",
-    "litellm>=1.0.0",
-    "pydantic>=2.0.0",
+    "ragas>=0.4.0",
+    "deepeval>=3.9.0",
+    "litellm>=1.80.0",
+    "pydantic>=2.10.0",
     "pyyaml>=6.0",
-    "pandas>=2.1.4",
-    "datasets>=2.0.0",
+    "pandas>=2.2.0",
+    "datasets>=3.0.0",
     "matplotlib>=3.5.0",
     "seaborn>=0.11.0",
     "numpy>=1.23.0",
-    "scipy>=1.10.0",
+    "scipy>=1.14.0",
     # Agent evaluation dependencies (for future integration)
-    "httpx>=0.27.2",
+    "httpx>=0.28.0",
     "tqdm>=4.67.1",
     # Generate answers dependencies
     "click>=8.0.0",
     "diskcache>=5.6.3",
     "tenacity>=9.1.2",
-    "langchain[huggingface]>=0.3.27",
+    "langchain[huggingface]>=0.3.30",
     "langchain-google-genai>=2.0.0",
 ]
 
@@ -39,8 +39,8 @@ dependencies = [
 #   CPU (default, ~2GB): uv sync --extra local-embeddings
 #   GPU (~6GB):          cp uv-gpu.lock uv.lock && uv sync --extra local-embeddings --frozen
 local-embeddings = [
-    "torch>=2.0.0",
-    "sentence-transformers>=5.1.0",
+    "torch>=2.5.0",
+    "sentence-transformers>=5.2.0",
 ]
 
 # NLP metrics dependencies - required for nlp:bleu, nlp:rouge, nlp:semantic_similarity_distance
@@ -59,12 +59,12 @@ dev = [
     "bandit>=1.7.0",
     "black>=25.1.0",
     "mypy>=1.15.0",
-    "ruff>=0.8.0",
+    "ruff>=0.9.0",
     "pyright>=1.1.401",
     "pydocstyle>=6.3.0",
     "pylint-pydantic>=0.3.0",
     "pytest>=8.3.2",
-    "pytest-cov>=5.0.0",
+    "pytest-cov>=6.0.0",
     "pytest-mock>=3.15.1",
     "pytest-timeout>=2.4.0",
 ]

diff --git a/src/lightspeed_evaluation/core/constants.py b/src/lightspeed_evaluation/core/constants.py
@@ -1,7 +1,6 @@
 """Common constants for evaluation framework."""
 
-# Map similarity measure strings to Ragas DistanceMeasure enum
-from ragas.metrics import DistanceMeasure
+from ragas.metrics.collections import DistanceMeasure
 
 # NLP Metrics Constants - BLEU
 DEFAULT_BLEU_MAX_NGRAM = 4  # Standard BLEU uses up to 4-grams

diff --git a/src/lightspeed_evaluation/core/embedding/ragas.py b/src/lightspeed_evaluation/core/embedding/ragas.py
@@ -1,13 +1,9 @@
-"""Ragas Embedding Manager - Ragas specific embedding wrapper."""
+"""Ragas Embedding Manager - Ragas 0.4+ specific embedding wrapper."""
 
 import logging
-from typing import Any
+from typing import Any, cast
 
-from langchain_google_genai import GoogleGenerativeAIEmbeddings
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_openai import OpenAIEmbeddings
-from ragas.cache import DiskCacheBackend
-from ragas.embeddings import LangchainEmbeddingsWrapper
+from ragas.embeddings.base import BaseRagasEmbedding, embedding_factory
 
 from lightspeed_evaluation.core.embedding.manager import EmbeddingManager
 from lightspeed_evaluation.core.system.exceptions import ConfigurationError
@@ -16,38 +12,54 @@
 
 
 class RagasEmbeddingManager:  # pylint: disable=too-few-public-methods
-    """Ragas Embedding Manager, modifies global ragas settings."""
+    """Ragas Embedding Manager using embedding_factory for ragas 0.4+."""
 
     def __init__(self, embedding_manager: EmbeddingManager):
-        """Init RagasEmbeddingManager."""
+        """Initialize RagasEmbeddingManager with embedding_factory.
+
+        Args:
+            embedding_manager: Pre-configured EmbeddingManager with validated parameters
+        """
         config = embedding_manager.config
         self.config = config
 
-        embedding_class: Any
-        if config.provider == "openai":
-            embedding_class = OpenAIEmbeddings
-        elif config.provider == "huggingface":
-            # EmbeddingManager already validated sentence-transformers is available
-            embedding_class = HuggingFaceEmbeddings
-        elif config.provider == "gemini":
-            embedding_class = GoogleGenerativeAIEmbeddings
+        # Map provider names to litellm format
+        provider = config.provider.lower()
+        model = config.model
+
+        # Build the model string for litellm
+        # Only OpenAI, Gemini, and HuggingFace are supported
+        if provider == "openai":
+            model_str = model  # OpenAI models don't need prefix
+        elif provider == "huggingface":
+            model_str = f"huggingface/{model}"
+        elif provider == "gemini":
+            model_str = f"gemini/{model}"
         else:
             logger.error("Unknown embedding provider: %s", config.provider)
             raise ConfigurationError(f"Unknown embedding provider {config.provider}")
 
         logger.debug(
-            "Using embedding provider: %s with model: %s",
-            config.provider,
-            config.model,
+            "Using embedding provider: %s with model: %s -> %s",
+            provider,
+            model,
+            model_str,
         )
 
-        kwargs = config.provider_kwargs
-        if kwargs is None:
-            kwargs = {}
+        # Get additional provider kwargs
+        kwargs: dict[str, Any] = {}
+        if config.provider_kwargs:
+            kwargs.update(config.provider_kwargs)
 
-        cacher = None
-        if config.cache_enabled:
-            cacher = DiskCacheBackend(cache_dir=config.cache_dir)
-        self.embeddings = LangchainEmbeddingsWrapper(
-            embedding_class(model=config.model, **kwargs), cache=cacher
+        # Create embeddings using ragas 0.4+ embedding_factory with litellm
+        # Cast to BaseRagasEmbedding as embedding_factory returns union type
+        self.embeddings: BaseRagasEmbedding = cast(
+            BaseRagasEmbedding,
+            embedding_factory(
+                "litellm",
+                model=model_str,
+                **kwargs,
+            ),
         )
+
+        logger.info("Ragas Embedding Manager configured: %s/%s", provider, model)
diff --git a/src/lightspeed_evaluation/core/llm/custom.py b/src/lightspeed_evaluation/core/llm/custom.py
@@ -1,12 +1,12 @@
 """Base Custom LLM class for evaluation framework."""
 
 import logging
-import os
 from typing import Any, Optional, Union
 
 import litellm
 from litellm.exceptions import InternalServerError
 
+from lightspeed_evaluation.core.llm.litellm_patch import setup_litellm_ssl
 from lightspeed_evaluation.core.system.exceptions import LLMError
 
 logger = logging.getLogger(__name__)
@@ -20,21 +20,10 @@ def __init__(self, model_name: str, llm_params: dict[str, Any]):
         self.model_name = model_name
         self.llm_params = llm_params
 
-        self.setup_ssl_verify()
-
         # Always drop unsupported parameters for cross-provider compatibility
         litellm.drop_params = True
 
-    def setup_ssl_verify(self) -> None:
-        """Setup SSL verification based on LLM parameters."""
-        ssl_verify = self.llm_params.get("ssl_verify", True)
-
-        if ssl_verify:
-            # Use our combined certifi bundle (includes system + custom certs)
-            litellm.ssl_verify = os.environ.get("SSL_CERTIFI_BUNDLE", True)
-        else:
-            # Explicitly disable SSL verification
-            litellm.ssl_verify = False
+        setup_litellm_ssl(llm_params)
 
     def call(
         self,

diff --git a/src/lightspeed_evaluation/core/llm/litellm_patch.py b/src/lightspeed_evaluation/core/llm/litellm_patch.py
@@ -1,21 +1,96 @@
-"""Global litellm patching for token tracking.
+"""LiteLLM configuration for token tracking and Ragas 0.4 compatibility.
 
-It patches litellm.completion and litellm.acompletion to automatically track tokens
-for all LLM calls throughout the application.
+This module configures litellm for two purposes:
+
+1. TOKEN TRACKING: Wraps litellm.completion and litellm.acompletion to track
+   token usage for all LLM calls (Judge LLM metrics). We use function wrapping
+   rather than litellm's callback system because callbacks don't reliably
+   capture tokens in all execution paths.
+
+2. RAGAS 0.4 COMPATIBILITY: Ragas 0.4's score() method internally uses
+   asyncio.run() which creates a new event loop. LiteLLM's background
+   LoggingWorker task conflicts with this, causing:
+   "RuntimeError: Queue is bound to a different event loop"
+
+   We replace the LoggingWorker with a no-op implementation to avoid this.
+   This is safe because we don't use litellm's built-in observability features.
 """
 
 import logging
+import os
+import warnings
 from functools import wraps
 from typing import Any
 
 import litellm
 
-from lightspeed_evaluation.core.llm.token_tracker import track_tokens
+# Suppress coroutine warnings from litellm's async logging (cosmetic only)
+warnings.filterwarnings(
+    "ignore",
+    message="coroutine.*was never awaited",
+    category=RuntimeWarning,
+)
+
+# pylint: disable=wrong-import-position
+from lightspeed_evaluation.core.llm.token_tracker import track_tokens  # noqa: E402
 
 logger = logging.getLogger(__name__)
 
 
-# Store original functions before patching
+# =============================================================================
+# RAGAS 0.4 COMPATIBILITY: No-op logging worker
+# =============================================================================
+# Replace litellm's LoggingWorker with a no-op to prevent event loop conflicts
+# when Ragas creates new event loops via asyncio.run().
+
+
+class _NoOpLoggingWorker:
+    """No-op logging worker to prevent event loop conflicts with Ragas 0.4.
+
+    LiteLLM's LoggingWorker runs async tasks that conflict with Ragas's use of
+    asyncio.run(). This no-op replacement silently ignores all logging operations.
+
+    See: https://github.com/BerriAI/litellm/issues/17813
+    """
+
+    def ensure_initialized_and_enqueue(self, *args: Any, **kwargs: Any) -> None:
+        """No-op: silently ignore."""
+
+    def enqueue(self, *args: Any, **kwargs: Any) -> None:
+        """No-op: silently ignore."""
+
+    def start(self) -> None:
+        """No-op: nothing to start."""
+
+    def stop(self) -> None:
+        """No-op: nothing to stop."""
+
+    def flush(self) -> None:
+        """No-op: nothing to flush."""
+
+    def clear_queue(self) -> None:
+        """No-op: nothing to clear."""
+
+
+# Apply the no-op worker
+try:
+    # pylint: disable=ungrouped-imports
+    import litellm.litellm_core_utils.logging_worker as logging_worker_module
+
+    logging_worker_module.GLOBAL_LOGGING_WORKER = _NoOpLoggingWorker()  # type: ignore[assignment]
+except (ImportError, AttributeError):
+    pass  # Older versions of litellm may not have this
+
+# Configure litellm to minimize async logging activity
+litellm.suppress_debug_info = True
+
+
+# =============================================================================
+# TOKEN TRACKING: Wrap completion functions
+# =============================================================================
+# We wrap the completion functions rather than using callbacks because
+# callbacks don't reliably capture tokens in all execution paths.
+
 _original_completion = litellm.completion
 _original_acompletion = litellm.acompletion
 
@@ -45,3 +120,20 @@ async def _acompletion_with_token_tracking(*args: Any, **kwargs: Any) -> Any:
 # Patch litellm's completion functions to include token tracking
 litellm.completion = _completion_with_token_tracking
 litellm.acompletion = _acompletion_with_token_tracking
+
+
+# =============================================================================
+# SSL CONFIGURATION UTILITY
+# =============================================================================
+def setup_litellm_ssl(llm_params: dict[str, Any]) -> None:
+    """Configure litellm SSL verification.
+
+    Args:
+        llm_params: Dictionary containing LLM parameters including 'ssl_verify'
+    """
+    ssl_verify = llm_params.get("ssl_verify", True)
+
+    if ssl_verify:
+        litellm.ssl_verify = os.environ.get("SSL_CERTIFI_BUNDLE", True)
+    else:
+        litellm.ssl_verify = False