telecmi · susovan-telecmi · Oct 17, 2025 · Oct 30, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/docs/noise_cancellation/krisp.md b/docs/noise_cancellation/krisp.md
@@ -0,0 +1,39 @@
+# Krisp Python SDK Installation
+
+1. **Download SDK**
+   Download the latest **Python SDK v1.4.0** from the SDK Portal.
+   For VIVA, check the `krisp_audio_test.py` sample for reference.
+
+   > 💡 *Recommended:* Save the SDK inside `src/piopiy/audio/krisp` so all Krisp-related files stay together.
+
+2. **Unzip & Locate Wheel**
+   Unzip the SDK. Inside the `dist/` folder, choose the `.whl` file matching your setup.
+   Example (Python 3.11, Linux x86_64):
+
+   ```
+   src/piopiy/audio/krisp/krisp-audio-sdk-python-1.4.0/dist/krisp_audio-1.4.0-cp311-cp311-linux_x86_64.whl
+   ```
+
+3. **Install SDK**
+
+   ```bash
+   cd src/piopiy/audio/krisp/krisp-audio-sdk-python-1.4.0/dist
+   pip install krisp_audio-1.4.0-cp311-cp311-linux_x86_64.whl
+   ```
+
+   Verify installation:
+
+   ```bash
+   pip show krisp_audio
+   ```
+
+4. **Download Model**
+   Download the **VIVA model package** (`krisp-viva-models-9.9`) from the same SDK Portal.
+   Inside it, you’ll find several `.kef` model files — choose the **`krisp-viva-tel-v2.kef`** model, as it performs best for real-time and WebRTC/WebSocket-based audio.
+
+5. **Set Environment Variable**
+   In your `.env` file, add:
+
+   ```bash
+   KRISP_MODEL_PATH=/home/user/voice/agents/src/piopiy/audio/krisp/krisp-viva-models-9.9/krisp-viva-tel-v2.kef
+   ```
diff --git a/docs/noise_cancellation/smart_turn_v3.md b/docs/noise_cancellation/smart_turn_v3.md
@@ -0,0 +1,29 @@
+# Installation Guide
+
+```bash
+pip install numpy loguru onnxruntime transformers
+```
+
+**For GPU support (recommended for better performance):**
+
+```bash
+pip install numpy loguru onnxruntime-gpu transformers
+```
+
+## Download Model File
+
+Download the Smart Turn V3 ONNX model from Hugging Face:
+
+```bash
+# Create directory structure
+mkdir -p piopiy/audio/turn/smart_turn/data
+
+# Download the model file
+wget https://huggingface.co/pipecat-ai/smart-turn-v3/tree/main -O piopiy/audio/turn/smart_turn/data/smart-turn-v3.0.onnx
+```
+
+**Or download manually:**
+
+1. Go to: https://huggingface.co/pipecat-ai/smart-turn-v3/blob/main/smart-turn-v3.0.onnx
+2. Click the download button
+3. Place the file in: `piopiy/audio/vad/data/smart-turn-v3.0.onnx`
diff --git a/...function_calling/pinecone_search_agent.py → example/rag/pinecone_search_agent.py b/...function_calling/pinecone_search_agent.py → example/rag/pinecone_search_agent.py
diff --git a/src/piopiy/audio/turn/smart_turn/local_smart_turn_v3.py b/src/piopiy/audio/turn/smart_turn/local_smart_turn_v3.py
@@ -0,0 +1,124 @@
+#
+# Copyright (c) 2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""Local turn analyzer for on-device ML inference using the smart-turn-v3 model.
+
+This module provides a smart turn analyzer that uses an ONNX model for
+local end-of-turn detection without requiring network connectivity.
+"""
+
+from typing import Any, Dict, Optional
+
+import numpy as np
+from loguru import logger
+
+from piopiy.audio.turn.smart_turn.base_smart_turn import BaseSmartTurn
+
+try:
+    import onnxruntime as ort
+    from transformers import WhisperFeatureExtractor
+except ModuleNotFoundError as e:
+    logger.error(f"Exception: {e}")
+    logger.error(
+        "In order to use LocalSmartTurnAnalyzerV3, you need to `pip install pipecat-ai[local-smart-turn-v3]`."
+    )
+    raise Exception(f"Missing module: {e}")
+
+
+class LocalSmartTurnAnalyzerV3(BaseSmartTurn):
+    """Local turn analyzer using the smart-turn-v3 ONNX model.
+
+    Provides end-of-turn detection using locally-stored ONNX model,
+    enabling offline operation without network dependencies.
+    """
+
+    def __init__(self, *, smart_turn_model_path: Optional[str] = None, **kwargs):
+        """Initialize the local ONNX smart-turn-v3 analyzer.
+
+        Args:
+            smart_turn_model_path: Path to the ONNX model file. If this is not
+                set, the bundled smart-turn-v3.0 model will be used.
+            **kwargs: Additional arguments passed to BaseSmartTurn.
+        """
+        super().__init__(**kwargs)
+
+        logger.debug("Loading Local Smart Turn v3 model...")
+
+        if not smart_turn_model_path:
+            # Load bundled model
+            model_name = "smart-turn-v3.0.onnx"
+            package_path = "piopiy.audio.turn.smart_turn.data"
+
+            try:
+                import importlib_resources as impresources
+
+                smart_turn_model_path = str(impresources.files(package_path).joinpath(model_name))
+            except BaseException:
+                from importlib import resources as impresources
+
+                try:
+                    with impresources.path(package_path, model_name) as f:
+                        smart_turn_model_path = f
+                except BaseException:
+                    smart_turn_model_path = str(
+                        impresources.files(package_path).joinpath(model_name)
+                    )
+
+        so = ort.SessionOptions()
+        so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
+        so.inter_op_num_threads = 1
+        so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+
+        self._feature_extractor = WhisperFeatureExtractor(chunk_length=8)
+        self._session = ort.InferenceSession(smart_turn_model_path, sess_options=so)
+
+        logger.debug("Loaded Local Smart Turn v3")
+
+    def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+        """Predict end-of-turn using local ONNX model."""
+
+        def truncate_audio_to_last_n_seconds(audio_array, n_seconds=8, sample_rate=16000):
+            """Truncate audio to last n seconds or pad with zeros to meet n seconds."""
+            max_samples = n_seconds * sample_rate
+            if len(audio_array) > max_samples:
+                return audio_array[-max_samples:]
+            elif len(audio_array) < max_samples:
+                # Pad with zeros at the beginning
+                padding = max_samples - len(audio_array)
+                return np.pad(audio_array, (padding, 0), mode="constant", constant_values=0)
+            return audio_array
+
+        # Truncate to 8 seconds (keeping the end) or pad to 8 seconds
+        audio_array = truncate_audio_to_last_n_seconds(audio_array, n_seconds=8)
+
+        # Process audio using Whisper's feature extractor
+        inputs = self._feature_extractor(
+            audio_array,
+            sampling_rate=16000,
+            return_tensors="np",
+            padding="max_length",
+            max_length=8 * 16000,
+            truncation=True,
+            do_normalize=True,
+        )
+
+        # Extract features and ensure correct shape for ONNX
+        input_features = inputs.input_features.squeeze(0).astype(np.float32)
+        input_features = np.expand_dims(input_features, axis=0)  # Add batch dimension
+
+        # Run ONNX inference
+        outputs = self._session.run(None, {"input_features": input_features})
+
+        # Extract probability (ONNX model returns sigmoid probabilities)
+        probability = outputs[0][0].item()
+
+        # Make prediction (1 for Complete, 0 for Incomplete)
+        prediction = 1 if probability > 0.5 else 0
+
+        return {
+            "prediction": prediction,
+            "probability": probability,
+        }