diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..f471242 --- /dev/null +++ b/.env.example @@ -0,0 +1,60 @@ +# ───────────────────────────────────────────────────────────────────────────── +# TAP Voice Agent — Environment Variables Reference +# Copy this file to .env and fill in real values. +# NEVER commit your .env file — it is git-ignored. +# ───────────────────────────────────────────────────────────────────────────── + +# ── App identity ────────────────────────────────────────────────────────────── +APP_NAME=tap-voice-agent +VERSION=0.1.0 +ENVIRONMENT=development # development | staging | production +DEBUG=false + +# ── Server ──────────────────────────────────────────────────────────────────── +HOST=0.0.0.0 +PORT=8000 +WORKERS=1 + +# ── Logging ─────────────────────────────────────────────────────────────────── +LOG_LEVEL=INFO # DEBUG | INFO | WARNING | ERROR +LOG_FORMAT=console # console (dev) | json (prod) + +# ── CORS (comma-separated list of allowed origins) ──────────────────────────── +CORS_ORIGINS=* + +# ── VAPI (voice platform) — leave blank to use mock ────────────────────────── +VAPI_API_KEY= +VAPI_BASE_URL=https://api.vapi.ai +VAPI_PHONE_NUMBER_ID= + +# ── Twilio (fallback calling) — leave blank to use mock ────────────────────── +TWILIO_ACCOUNT_SID= +TWILIO_AUTH_TOKEN= +TWILIO_FROM_NUMBER= + +# ── WhatsApp / Meta ─────────────────────────────────────────────────────────── +META_WHATSAPP_TOKEN= +META_PHONE_NUMBER_ID= +META_VERIFY_TOKEN=tap_verify_token + +# ── TAP LMS (Frappe REST) ───────────────────────────────────────────────────── +TAP_LMS_BASE_URL=https://lms.theapprenticeproject.org +TAP_LMS_API_KEY= +TAP_LMS_API_SECRET= +TAP_LMS_USE_MOCK=true # Set to false when real credentials are available + +# ── Language detection ──────────────────────────────────────────────────────── +DEFAULT_LANGUAGE=en +SUPPORTED_LANGUAGES=hi,mr,pa,en + +# ── Nudge engine ────────────────────────────────────────────────────────────── +NUDGE_INACTIVITY_DAYS=3 # Days of LMS inactivity before nudge fires +NUDGE_MAX_PER_WEEK=2 # Max calls per learner per week + +# ── Metrics ─────────────────────────────────────────────────────────────────── +ENABLE_METRICS=true +METRICS_EXPORT_INTERVAL=60 + +# ── Database (SQLite default, swap to Postgres in production) ───────────────── +DATABASE_URL=sqlite+aiosqlite:///./tap_agent.db +# DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/tap_agent diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..8c43719 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: CI + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-dev.txt + + - name: Lint with ruff + run: | + ruff check . + + - name: Type check with mypy + run: | + mypy app + + - name: Test with pytest + run: | + pytest --cov=app --cov-report=xml + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8a18c4f --- /dev/null +++ b/.gitignore @@ -0,0 +1,170 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.pyverse +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# VS Code +.vscode/ + +# MacOS +.DS_Store + +# Application specific +tap_agent.db +*.db diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bbe0ad3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,50 @@ +# ── Stage 1: dependency builder ─────────────────────────────────────────────── +FROM python:3.11-slim AS builder + +WORKDIR /build + +# Install pip tools +RUN pip install --upgrade pip + +# Copy dependency manifests first for layer caching +COPY requirements.txt requirements-dev.txt ./ + +# Install production deps into /build/venv +RUN python -m venv /build/venv \ + && /build/venv/bin/pip install --no-cache-dir -r requirements.txt + + +# ── Stage 2: production image ────────────────────────────────────────────────── +FROM python:3.11-slim AS runtime + +LABEL org.opencontainers.image.title="TAP Voice Agent" \ + org.opencontainers.image.description="Multilingual Voice Agent — C4GT 2026" \ + org.opencontainers.image.source="https://github.com/SuryaPratapIIIT/C4GT_2026" \ + org.opencontainers.image.licenses="MIT" + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PATH="/venv/bin:$PATH" + +# Non-root user for security +RUN addgroup --system appgroup && adduser --system --ingroup appgroup appuser + +WORKDIR /app + +# Copy virtual environment from builder +COPY --from=builder /build/venv /venv + +# Copy application source +COPY app/ ./app/ + +# Ownership +RUN chown -R appuser:appgroup /app + +USER appuser + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health/live')" + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"] diff --git a/README.md b/README.md index 04e1239..771a3c0 100644 --- a/README.md +++ b/README.md @@ -1 +1,74 @@ -# C4GT_2026 \ No newline at end of file +# TAP Multilingual Voice Agent (C4GT 2026) + +This repository contains the prototype for **The Apprentice Project's (TAP)** multilingual voice agent, built as part of Code For GovTech (C4GT) 2026. The agent automates student engagement and learning nudges via outbound phone calls and handles inbound voice and WhatsApp interactions. + +## Architecture + +The project is built with a modular architecture using **FastAPI** (Python 3.11): +- **Core APIs**: `app/api/` handles inbound webhooks from voice providers (VAPI, Twilio) and WhatsApp. +- **Services**: `app/services/` contains business logic like the `NudgeEngine` and `LanguageDetector`. +- **Adapters**: `app/adapters/` abstractions over external dependencies (like the Frappe LMS). A `MockLMSAdapter` is provided for local testing without external credentials. +- **Models**: `app/models/` contains Pydantic schemas standardising the domain shapes. + +## Local Setup + +### Prerequisites +- Python 3.11+ +- [Poetry](https://python-poetry.org/) (recommended) or `pip` +- Docker & Docker Compose (for full stack run) + +### 1. Environment Configuration + +Copy the example environment file: +```bash +cp .env.example .env +``` +Update `.env` with any relevant local configuration. For standard local dev, the defaults (which use mock providers) are sufficient. + +### 2. Running without Docker (Development) + +Install dependencies using Poetry: +```bash +poetry install +``` + +Or using pip: +```bash +pip install -r requirements.txt -r requirements-dev.txt +``` + +Start the FastAPI server: +```bash +poetry run uvicorn app.main:app --reload --port 8000 +``` + +Access the API documentation at: +- Swagger UI: [http://localhost:8000/docs](http://localhost:8000/docs) +- ReDoc: [http://localhost:8000/redoc](http://localhost:8000/redoc) + +### 3. Running with Docker Compose + +To spin up the API along with Redis and an observability stack (Prometheus/Grafana): + +```bash +docker-compose --profile observability up -d --build +``` +- API: [http://localhost:8000](http://localhost:8000) +- Prometheus: [http://localhost:9090](http://localhost:9090) +- Grafana: [http://localhost:3000](http://localhost:3000) (admin / admin) + +## Testing + +Run the test suite: +```bash +poetry run pytest +``` +Or with test coverage: +```bash +poetry run pytest --cov=app --cov-report=term-missing +``` + +## Contributing +- Ensure code passes `ruff` checks and `mypy` type checking. +- Every PR must be independently reviewable and include relevant tests. +- Do not commit `.env` or hardcode secrets in the repository. \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..9988faa --- /dev/null +++ b/app/__init__.py @@ -0,0 +1 @@ +"""Package marker for the TAP Voice Agent application.""" diff --git a/app/adapters/__init__.py b/app/adapters/__init__.py new file mode 100644 index 0000000..6a94df0 --- /dev/null +++ b/app/adapters/__init__.py @@ -0,0 +1 @@ +"""Adapters package.""" diff --git a/app/adapters/lms/__init__.py b/app/adapters/lms/__init__.py new file mode 100644 index 0000000..499602e --- /dev/null +++ b/app/adapters/lms/__init__.py @@ -0,0 +1,33 @@ +"""LMS adapters package — exposes factory function.""" + +from __future__ import annotations + +from app.adapters.lms.base import BaseLMSAdapter, LearnerNotFoundError, LMSUnavailableError +from app.adapters.lms.mock_adapter import MockLMSAdapter + + +def get_lms_adapter() -> BaseLMSAdapter: + """ + Return the correct LMS adapter based on configuration. + + When TAP_LMS_USE_MOCK is True (default), the MockLMSAdapter is used. + Set TAP_LMS_USE_MOCK=false and provide TAP_LMS_API_KEY / TAP_LMS_API_SECRET + to switch to the real Frappe adapter. + """ + from app.core.config import settings # avoid circular imports + + if settings.TAP_LMS_USE_MOCK: + return MockLMSAdapter() + + from app.adapters.lms.frappe_adapter import FrappeLMSAdapter + + return FrappeLMSAdapter() + + +__all__ = [ + "BaseLMSAdapter", + "LearnerNotFoundError", + "LMSUnavailableError", + "MockLMSAdapter", + "get_lms_adapter", +] diff --git a/app/adapters/lms/base.py b/app/adapters/lms/base.py new file mode 100644 index 0000000..f8b7bc2 --- /dev/null +++ b/app/adapters/lms/base.py @@ -0,0 +1,67 @@ +""" +TAP LMS Adapter interface. + +Defines the abstract contract that both the mock adapter (used in tests +and local development) and the real Frappe REST adapter (Part 2) must +implement. New adapters only need to sub-class BaseLMSAdapter. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any + +from app.models.schemas import LearnerActivity, LearnerProfile + + +class BaseLMSAdapter(ABC): + """Abstract base class for all TAP LMS adapters.""" + + @abstractmethod + async def get_learner_profile(self, learner_id: str) -> LearnerProfile: + """ + Fetch a learner's profile by their TAP LMS learner ID. + + Raises: + LearnerNotFoundError: if the learner does not exist. + LMSUnavailableError: if the LMS cannot be reached. + """ + + @abstractmethod + async def get_learner_activity(self, learner_id: str) -> LearnerActivity: + """ + Return the learner's recent LMS activity (courses, last login, etc.). + + Raises: + LearnerNotFoundError: if the learner does not exist. + LMSUnavailableError: if the LMS cannot be reached. + """ + + @abstractmethod + async def update_engagement_log( + self, + learner_id: str, + event_type: str, + metadata: dict[str, Any], + ) -> None: + """ + Post an engagement event back to the LMS (e.g. call_attempted). + + This is a fire-and-forget operation; callers should not depend + on a return value. + """ + + +# ── Domain exceptions ────────────────────────────────────────────────────────── + + +class LearnerNotFoundError(Exception): + """Raised when a learner ID cannot be resolved in the LMS.""" + + def __init__(self, learner_id: str) -> None: + super().__init__(f"Learner not found: {learner_id!r}") + self.learner_id = learner_id + + +class LMSUnavailableError(Exception): + """Raised when the LMS API is unreachable or returns a server error.""" diff --git a/app/adapters/lms/frappe_adapter.py b/app/adapters/lms/frappe_adapter.py new file mode 100644 index 0000000..66660ca --- /dev/null +++ b/app/adapters/lms/frappe_adapter.py @@ -0,0 +1,83 @@ +""" +Frappe / TAP LMS REST adapter skeleton. + +Implement this adapter in Part 2 by filling each method. +The interface is identical to MockLMSAdapter; no callers need to change. + +Authentication: Frappe uses token-based auth via the Authorization header: + Authorization: token : + +Docs: https://frappeframework.com/docs/user/en/api/rest +""" + +from __future__ import annotations +from typing import Any + +import structlog +from httpx import AsyncClient + +from app.adapters.lms.base import BaseLMSAdapter +from app.core.config import settings +from app.models.schemas import LearnerActivity, LearnerProfile + +logger = structlog.get_logger(__name__) + + +class FrappeLMSAdapter(BaseLMSAdapter): + """ + Real TAP LMS adapter calling the Frappe REST API. + + Usage + ----- + Set the following environment variables and TAP_LMS_USE_MOCK=false: + TAP_LMS_BASE_URL=https://lms.theapprenticeproject.org + TAP_LMS_API_KEY= + TAP_LMS_API_SECRET= + """ + + def __init__(self) -> None: + self._base_url = str(settings.TAP_LMS_BASE_URL).rstrip("/") + self._headers = { + "Authorization": f"token {settings.TAP_LMS_API_KEY}:{settings.TAP_LMS_API_SECRET}", + "Content-Type": "application/json", + } + + @property + def _client(self) -> AsyncClient: + return AsyncClient(base_url=self._base_url, headers=self._headers, timeout=10.0) + + async def get_learner_profile(self, learner_id: str) -> LearnerProfile: + """ + GET /api/resource/Learner/ + + TODO: implement once real API shape is confirmed. + """ + raise NotImplementedError( + "FrappeLMSAdapter.get_learner_profile is not yet implemented. " + "Set TAP_LMS_USE_MOCK=true to use the mock adapter." + ) + + async def get_learner_activity(self, learner_id: str) -> LearnerActivity: + """ + GET /api/method/tap_lms.api.get_learner_activity?learner_id= + + TODO: implement once real API shape is confirmed. + """ + raise NotImplementedError( + "FrappeLMSAdapter.get_learner_activity is not yet implemented." + ) + + async def update_engagement_log( + self, + learner_id: str, + event_type: str, + metadata: dict[str, Any], + ) -> None: + """ + POST /api/method/tap_lms.api.log_engagement + + TODO: implement once real API shape is confirmed. + """ + raise NotImplementedError( + "FrappeLMSAdapter.update_engagement_log is not yet implemented." + ) diff --git a/app/adapters/lms/mock_adapter.py b/app/adapters/lms/mock_adapter.py new file mode 100644 index 0000000..34d4f98 --- /dev/null +++ b/app/adapters/lms/mock_adapter.py @@ -0,0 +1,147 @@ +""" +Mock TAP LMS adapter. + +Returns deterministic in-memory data. Used automatically when +TAP_LMS_USE_MOCK=true (the default). Replace with FrappeLMSAdapter +in Part 2 by setting TAP_LMS_USE_MOCK=false and providing credentials. +""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from typing import Any + +import structlog + +from app.adapters.lms.base import BaseLMSAdapter, LearnerNotFoundError +from app.models.schemas import ( + CourseProgress, + Language, + LearnerActivity, + LearnerProfile, +) + +logger = structlog.get_logger(__name__) + +# ── Seed data ────────────────────────────────────────────────────────────────── + +_MOCK_LEARNERS: dict[str, LearnerProfile] = { + "learner_001": LearnerProfile( + learner_id="learner_001", + name="Priya Sharma", + phone_number="+919876543210", + preferred_language=Language.HINDI, + enrolled_courses=["digital_literacy_101", "financial_basics_201"], + last_active_at=datetime.now(UTC) - timedelta(days=5), + completion_percentage=42.0, + days_inactive=5, + ), + "learner_002": LearnerProfile( + learner_id="learner_002", + name="Rajveer Singh", + phone_number="+919988776655", + preferred_language=Language.PUNJABI, + enrolled_courses=["digital_literacy_101"], + last_active_at=datetime.now(UTC) - timedelta(days=1), + completion_percentage=80.0, + days_inactive=1, + ), + "learner_003": LearnerProfile( + learner_id="learner_003", + name="Sneha Patil", + phone_number="+919123456789", + preferred_language=Language.MARATHI, + enrolled_courses=["digital_literacy_101", "entrepreneurship_301"], + last_active_at=datetime.now(UTC) - timedelta(days=10), + completion_percentage=15.0, + days_inactive=10, + ), +} + +_MOCK_COURSES: dict[str, list[CourseProgress]] = { + "learner_001": [ + CourseProgress( + course_id="digital_literacy_101", + course_name="Digital Literacy 101", + completion_percentage=75.0, + last_accessed_at=datetime.now(UTC) - timedelta(days=5), + pending_assignments=1, + ), + CourseProgress( + course_id="financial_basics_201", + course_name="Financial Basics", + completion_percentage=10.0, + last_accessed_at=datetime.now(UTC) - timedelta(days=12), + pending_assignments=3, + ), + ], + "learner_002": [ + CourseProgress( + course_id="digital_literacy_101", + course_name="Digital Literacy 101", + completion_percentage=80.0, + last_accessed_at=datetime.now(UTC) - timedelta(days=1), + pending_assignments=0, + ), + ], + "learner_003": [ + CourseProgress( + course_id="digital_literacy_101", + course_name="Digital Literacy 101", + completion_percentage=20.0, + last_accessed_at=datetime.now(UTC) - timedelta(days=10), + pending_assignments=4, + ), + CourseProgress( + course_id="entrepreneurship_301", + course_name="Entrepreneurship Fundamentals", + completion_percentage=5.0, + last_accessed_at=datetime.now(UTC) - timedelta(days=10), + pending_assignments=2, + ), + ], +} + + +class MockLMSAdapter(BaseLMSAdapter): + """In-memory mock implementation of BaseLMSAdapter.""" + + async def get_learner_profile(self, learner_id: str) -> LearnerProfile: + profile = _MOCK_LEARNERS.get(learner_id) + if not profile: + raise LearnerNotFoundError(learner_id) + logger.debug("mock_lms.get_learner_profile", learner_id=learner_id) + return profile + + async def get_learner_activity(self, learner_id: str) -> LearnerActivity: + if learner_id not in _MOCK_LEARNERS: + raise LearnerNotFoundError(learner_id) + + courses = _MOCK_COURSES.get(learner_id, []) + total_pct = ( + sum(c.completion_percentage for c in courses) / len(courses) + if courses + else 0.0 + ) + profile = _MOCK_LEARNERS[learner_id] + logger.debug("mock_lms.get_learner_activity", learner_id=learner_id) + return LearnerActivity( + learner_id=learner_id, + courses=courses, + total_completion_percentage=round(total_pct, 2), + days_since_last_login=profile.days_inactive, + ) + + async def update_engagement_log( + self, + learner_id: str, + event_type: str, + metadata: dict[str, Any], + ) -> None: + logger.info( + "mock_lms.engagement_log", + learner_id=learner_id, + event_type=event_type, + metadata=metadata, + ) + # No-op in mock — real adapter will POST to Frappe REST API diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..dff53e5 --- /dev/null +++ b/app/api/__init__.py @@ -0,0 +1 @@ +"""API package.""" diff --git a/app/api/v1/__init__.py b/app/api/v1/__init__.py new file mode 100644 index 0000000..b08cd20 --- /dev/null +++ b/app/api/v1/__init__.py @@ -0,0 +1 @@ +"""API v1 package.""" diff --git a/app/api/v1/health.py b/app/api/v1/health.py new file mode 100644 index 0000000..d494c67 --- /dev/null +++ b/app/api/v1/health.py @@ -0,0 +1,66 @@ +"""Health check router.""" + +from __future__ import annotations + +import structlog +from fastapi import APIRouter +from fastapi.responses import JSONResponse + +from app.core.config import settings +from app.models.schemas import HealthResponse + +router = APIRouter() +logger = structlog.get_logger(__name__) + + +@router.get( + "", + response_model=HealthResponse, + summary="Application health check", + description=( + "Returns 200 when the application is running. " + "Additional dependency checks (DB, LMS) are included in `checks`." + ), +) +async def health_check() -> HealthResponse: + """Liveness + basic readiness probe.""" + checks: dict[str, str] = { + "lms_adapter": "mock" if settings.TAP_LMS_USE_MOCK else "live", + "voice_provider": _detect_voice_provider(), + } + logger.debug("health_check.ok", checks=checks) + return HealthResponse( + status="ok", + version=settings.VERSION, + environment=settings.ENVIRONMENT, + checks=checks, + ) + + +@router.get( + "/live", + summary="Kubernetes liveness probe", + include_in_schema=False, +) +async def liveness() -> JSONResponse: + return JSONResponse({"status": "alive"}) + + +@router.get( + "/ready", + summary="Kubernetes readiness probe", + include_in_schema=False, +) +async def readiness() -> JSONResponse: + return JSONResponse({"status": "ready"}) + + +# ── Helpers ──────────────────────────────────────────────────────────────────── + + +def _detect_voice_provider() -> str: + if settings.VAPI_API_KEY: + return "vapi" + if settings.TWILIO_ACCOUNT_SID: + return "twilio" + return "mock" diff --git a/app/api/v1/webhooks.py b/app/api/v1/webhooks.py new file mode 100644 index 0000000..4601a23 --- /dev/null +++ b/app/api/v1/webhooks.py @@ -0,0 +1,104 @@ +""" +Inbound webhook router. + +Handles call events from VAPI, Twilio, and WhatsApp. +Each provider adapter normalises its payload into a WebhookPayload +before the shared handler processes it. +""" + +from __future__ import annotations +from typing import Any + +import structlog +from fastapi import APIRouter, BackgroundTasks, HTTPException, Request, status + +from app.models.schemas import WebhookPayload + +router = APIRouter() +logger = structlog.get_logger(__name__) + + +@router.post( + "/voice", + status_code=status.HTTP_202_ACCEPTED, + summary="Inbound voice event webhook", + description=( + "Receives normalised call events (transcript, call.ended, etc.) " + "from VAPI or Twilio. The payload is validated and handed off to the " + "conversation service asynchronously." + ), +) +async def voice_webhook( + payload: WebhookPayload, + background_tasks: BackgroundTasks, +) -> dict[str, str]: + """Accept and queue a voice webhook event for processing.""" + logger.info( + "webhook.voice.received", + provider=payload.provider, + event_type=payload.event_type, + call_id=payload.call_id, + ) + # TODO (Part 3): wire to ConversationService + background_tasks.add_task(_handle_voice_event, payload) + return {"status": "accepted", "call_id": payload.call_id} + + +@router.post( + "/whatsapp", + status_code=status.HTTP_200_OK, + summary="Inbound WhatsApp message webhook", +) +async def whatsapp_webhook( + request: Request, + background_tasks: BackgroundTasks, +) -> dict[str, str]: + """ + Meta WhatsApp webhook. + + Meta requires a 200 response within 20 seconds; heavy processing is + deferred to a background task. + """ + body = await request.json() + logger.info("webhook.whatsapp.received", body_keys=list(body.keys())) + # TODO (Part 3): parse and route WhatsApp events + background_tasks.add_task(_handle_whatsapp_event, body) + return {"status": "accepted"} + + +@router.get( + "/whatsapp", + summary="Meta webhook verification", + include_in_schema=False, +) +async def whatsapp_verify(request: Request) -> int: + """ + Respond to Meta's hub.challenge verification request. + + https://developers.facebook.com/docs/graph-api/webhooks/getting-started + """ + from app.core.config import settings # local import to avoid circular + + params = request.query_params + mode = params.get("hub.mode") + token = params.get("hub.verify_token") + challenge = params.get("hub.challenge") + + if mode == "subscribe" and token == settings.META_VERIFY_TOKEN: + logger.info("webhook.whatsapp.verified") + return int(challenge or 0) + + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Verification failed") + + +# ── Background task stubs (replaced in Part 3) ──────────────────────────────── + + +async def _handle_voice_event(payload: WebhookPayload) -> None: + """Stub: process a normalised voice event.""" + logger.debug("voice_event.stub", event_type=payload.event_type) + + +async def _handle_whatsapp_event(body: dict[str, Any]) -> None: + """Stub: process a raw WhatsApp event body.""" + logger.debug("whatsapp_event.stub", keys=list(body.keys())) diff --git a/app/core/__init__.py b/app/core/__init__.py new file mode 100644 index 0000000..76f85a4 --- /dev/null +++ b/app/core/__init__.py @@ -0,0 +1 @@ +"""Core package.""" diff --git a/app/core/config.py b/app/core/config.py new file mode 100644 index 0000000..fe8f703 --- /dev/null +++ b/app/core/config.py @@ -0,0 +1,95 @@ +""" +Centralised configuration via Pydantic-Settings. + +All secrets and runtime tunables are injected through environment +variables. Never hardcode values here — use `.env` (git-ignored) or +`.env.example` (committed) as reference. +""" + +from __future__ import annotations + +from functools import lru_cache + +from pydantic import AnyHttpUrl, field_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """Application-wide settings loaded from environment variables.""" + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=True, + ) + + # ── App identity ─────────────────────────────────────────────────────────── + APP_NAME: str = "tap-voice-agent" + VERSION: str = "0.1.0" + ENVIRONMENT: str = "development" # development | staging | production + DEBUG: bool = False + + # ── Server ───────────────────────────────────────────────────────────────── + HOST: str = "0.0.0.0" + PORT: int = 8000 + WORKERS: int = 1 + + # ── CORS ─────────────────────────────────────────────────────────────────── + CORS_ORIGINS: list[str] = ["*"] + + @field_validator("CORS_ORIGINS", mode="before") + @classmethod + def _parse_cors(cls, value: str | list[str]) -> list[str]: + if isinstance(value, str): + return [origin.strip() for origin in value.split(",")] + return value + + # ── Logging ──────────────────────────────────────────────────────────────── + LOG_LEVEL: str = "INFO" + LOG_FORMAT: str = "json" # json | console + + # ── VAPI (voice platform) — mock by default ──────────────────────────────── + VAPI_API_KEY: str = "" + VAPI_BASE_URL: AnyHttpUrl = AnyHttpUrl("https://api.vapi.ai") + VAPI_PHONE_NUMBER_ID: str = "" + + # ── Twilio (fallback calling) — mock by default ──────────────────────────── + TWILIO_ACCOUNT_SID: str = "" + TWILIO_AUTH_TOKEN: str = "" + TWILIO_FROM_NUMBER: str = "" + + # ── WhatsApp / Meta ──────────────────────────────────────────────────────── + META_WHATSAPP_TOKEN: str = "" + META_PHONE_NUMBER_ID: str = "" + META_VERIFY_TOKEN: str = "tap_verify_token" + + # ── TAP LMS (Frappe) ─────────────────────────────────────────────────────── + TAP_LMS_BASE_URL: AnyHttpUrl = AnyHttpUrl("https://lms.theapprenticeproject.org") + TAP_LMS_API_KEY: str = "" + TAP_LMS_API_SECRET: str = "" + TAP_LMS_USE_MOCK: bool = True # flip to False when real creds are available + + # ── Language detection ───────────────────────────────────────────────────── + DEFAULT_LANGUAGE: str = "en" + SUPPORTED_LANGUAGES: list[str] = ["hi", "mr", "pa", "en"] + + # ── Engagement / nudging ─────────────────────────────────────────────────── + NUDGE_INACTIVITY_DAYS: int = 3 # days of LMS inactivity before nudge + NUDGE_MAX_PER_WEEK: int = 2 # max calls per learner per week + + # ── Metrics / experiments ────────────────────────────────────────────────── + ENABLE_METRICS: bool = True + METRICS_EXPORT_INTERVAL: int = 60 # seconds + + # ── Database (optional — for experiment logs) ────────────────────────────── + DATABASE_URL: str = "sqlite+aiosqlite:///./tap_agent.db" + + +@lru_cache(maxsize=1) +def get_settings() -> Settings: + """Return a cached singleton Settings instance.""" + return Settings() + + +# Module-level singleton for convenient import +settings: Settings = get_settings() diff --git a/app/core/logging.py b/app/core/logging.py new file mode 100644 index 0000000..7a9a487 --- /dev/null +++ b/app/core/logging.py @@ -0,0 +1,62 @@ +"""Structured logging configuration using structlog.""" + +from __future__ import annotations + +import logging +import sys + +import structlog + +from app.core.config import settings + + +def configure_logging() -> None: + """ + Configure structlog with JSON or console rendering depending on + the LOG_FORMAT setting. Call once at application startup. + """ + log_level = getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO) + + shared_processors: list[structlog.types.Processor] = [ + structlog.contextvars.merge_contextvars, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.StackInfoRenderer(), + ] + + if settings.LOG_FORMAT == "json": + renderer: structlog.types.Processor = structlog.processors.JSONRenderer() + else: + renderer = structlog.dev.ConsoleRenderer(colors=True) + + structlog.configure( + processors=[ + *shared_processors, + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], + wrapper_class=structlog.stdlib.BoundLogger, + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + cache_logger_on_first_use=True, + ) + + formatter = structlog.stdlib.ProcessorFormatter( + processors=[ + structlog.stdlib.ProcessorFormatter.remove_processors_meta, + renderer, + ], + foreign_pre_chain=shared_processors, + ) + + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(formatter) + + root_logger = logging.getLogger() + root_logger.handlers.clear() + root_logger.addHandler(handler) + root_logger.setLevel(log_level) + + # Suppress noisy third-party loggers in production + for noisy in ("uvicorn.access", "httpx", "httpcore"): + logging.getLogger(noisy).setLevel(logging.WARNING) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..e5d1ca1 --- /dev/null +++ b/app/main.py @@ -0,0 +1,72 @@ +""" +TAP Voice Agent — FastAPI application factory. + +This module wires together all routers, middleware, and lifespan +events. It is intentionally thin: business logic lives in services/. +""" + +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager + +import structlog +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from prometheus_fastapi_instrumentator import Instrumentator + +from app.api.v1 import health, webhooks +from app.core.config import settings +from app.core.logging import configure_logging + +logger = structlog.get_logger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: + """Application lifespan: startup → yield → shutdown.""" + configure_logging() + logger.info( + "tap_voice_agent.startup", + environment=settings.ENVIRONMENT, + version=settings.VERSION, + ) + yield + logger.info("tap_voice_agent.shutdown") + + +def create_app() -> FastAPI: + """Application factory — returns a configured FastAPI instance.""" + app = FastAPI( + title="TAP Voice Agent API", + description=( + "Multilingual Voice Agent for Student Engagement and Learning Nudges " + "— The Apprentice Project (C4GT 2026)" + ), + version=settings.VERSION, + docs_url="/docs" if settings.ENVIRONMENT != "production" else None, + redoc_url="/redoc" if settings.ENVIRONMENT != "production" else None, + lifespan=lifespan, + ) + + # ── Middleware ───────────────────────────────────────────────────────────── + app.add_middleware( + CORSMiddleware, + allow_origins=settings.CORS_ORIGINS, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + # ── Prometheus metrics ───────────────────────────────────────────────────── + Instrumentator( + should_group_status_codes=True, + excluded_handlers=["/health", "/metrics"], + ).instrument(app).expose(app, endpoint="/metrics", include_in_schema=False) + + # ── Routers ──────────────────────────────────────────────────────────────── + app.include_router(health.router, prefix="/health", tags=["Health"]) + app.include_router(webhooks.router, prefix="/api/v1/webhooks", tags=["Webhooks"]) + + return app + + +app: FastAPI = create_app() diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..53f3a7c --- /dev/null +++ b/app/models/__init__.py @@ -0,0 +1 @@ +"""Models package.""" diff --git a/app/models/schemas.py b/app/models/schemas.py new file mode 100644 index 0000000..6b8d8bf --- /dev/null +++ b/app/models/schemas.py @@ -0,0 +1,195 @@ +""" +Domain schemas (Pydantic v2). + +These are the canonical data shapes shared across the application. +Each schema owns its own validation rules; adapters translate +between external API shapes and these internal schemas. +""" + +from __future__ import annotations + +from datetime import UTC, datetime +from enum import Enum +from typing import Any +from uuid import UUID, uuid4 + +from pydantic import BaseModel, ConfigDict, Field + +# ── Enumerations ─────────────────────────────────────────────────────────────── + + +class Language(str, Enum): + """Supported conversation languages.""" + + HINDI = "hi" + MARATHI = "mr" + PUNJABI = "pa" + ENGLISH = "en" + + +class CallStatus(str, Enum): + """Lifecycle states of a voice call.""" + + PENDING = "pending" + QUEUED = "queued" + IN_PROGRESS = "in_progress" + COMPLETED = "completed" + FAILED = "failed" + NO_ANSWER = "no_answer" + BUSY = "busy" + + +class NudgeType(str, Enum): + """Classification of outbound engagement nudges.""" + + INACTIVITY_REMINDER = "inactivity_reminder" + PROGRESS_UPDATE = "progress_update" + COURSE_RECOMMENDATION = "course_recommendation" + ASSIGNMENT_DUE = "assignment_due" + MILESTONE_CELEBRATION = "milestone_celebration" + + +class ConversationTurn(str, Enum): + """Speaker role within a conversation turn.""" + + AGENT = "agent" + LEARNER = "learner" + SYSTEM = "system" + + +# ── Learner ──────────────────────────────────────────────────────────────────── + + +class LearnerProfile(BaseModel): + """Snapshot of a TAP LMS learner used for nudge personalisation.""" + + model_config = ConfigDict(from_attributes=True) + + learner_id: str = Field(..., description="Unique TAP LMS learner identifier") + name: str + phone_number: str = Field(..., pattern=r"^\+?[1-9]\d{7,14}$") + preferred_language: Language = Language.ENGLISH + enrolled_courses: list[str] = Field(default_factory=list) + last_active_at: datetime | None = None + completion_percentage: float = Field(default=0.0, ge=0.0, le=100.0) + days_inactive: int = Field(default=0, ge=0) + + +# ── LMS Activity ─────────────────────────────────────────────────────────────── + + +class CourseProgress(BaseModel): + """Progress snapshot for a single enrolled course.""" + + course_id: str + course_name: str + completion_percentage: float = Field(ge=0.0, le=100.0) + last_accessed_at: datetime | None = None + pending_assignments: int = Field(default=0, ge=0) + + +class LearnerActivity(BaseModel): + """Aggregated LMS activity for a learner, used by the nudge engine.""" + + learner_id: str + courses: list[CourseProgress] = Field(default_factory=list) + total_completion_percentage: float = Field(default=0.0, ge=0.0, le=100.0) + days_since_last_login: int = Field(default=0, ge=0) + fetched_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + + +# ── Calls ────────────────────────────────────────────────────────────────────── + + +class OutboundCallRequest(BaseModel): + """Payload to schedule an outbound voice call.""" + + learner_id: str + nudge_type: NudgeType + preferred_language: Language = Language.ENGLISH + scheduled_at: datetime | None = None # None = immediate + metadata: dict[str, Any] = Field(default_factory=dict) + + +class CallRecord(BaseModel): + """Persistent record of a voice call attempt.""" + + call_id: UUID = Field(default_factory=uuid4) + learner_id: str + nudge_type: NudgeType + language: Language + status: CallStatus = CallStatus.PENDING + provider: str = "mock" # vapi | twilio | mock + provider_call_id: str | None = None + duration_seconds: int | None = None + started_at: datetime | None = None + ended_at: datetime | None = None + created_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + updated_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + error_message: str | None = None + + +# ── Conversation ─────────────────────────────────────────────────────────────── + + +class ConversationMessage(BaseModel): + """A single turn in a voice conversation.""" + + message_id: UUID = Field(default_factory=uuid4) + call_id: UUID + turn: ConversationTurn + text: str + language: Language + confidence: float | None = Field(default=None, ge=0.0, le=1.0) + timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC)) + + +class WebhookPayload(BaseModel): + """ + Normalised inbound webhook payload from any voice provider. + + Adapters (VAPI, Twilio) must translate their native payloads + into this shape before handing off to the conversation service. + """ + + provider: str + event_type: str # call.started | call.ended | transcript | dtmf | etc. + call_id: str + learner_id: str | None = None + transcript: str | None = None + language_hint: str | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + received_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + + +# ── Nudge ────────────────────────────────────────────────────────────────────── + + +class NudgeDecision(BaseModel): + """Result of the nudge engine evaluation for a single learner.""" + + learner_id: str + should_nudge: bool + nudge_type: NudgeType | None = None + reason: str = "" + priority: int = Field(default=0, ge=0, le=10) # higher = more urgent + + +# ── API responses ────────────────────────────────────────────────────────────── + + +class HealthResponse(BaseModel): + """Response shape for the /health endpoint.""" + + status: str + version: str + environment: str + checks: dict[str, str] = Field(default_factory=dict) + + +class ErrorResponse(BaseModel): + """Standard error envelope.""" + + error: str + detail: str | None = None + request_id: str | None = None diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..c7775ec --- /dev/null +++ b/app/services/__init__.py @@ -0,0 +1 @@ +"""Services package.""" diff --git a/app/services/language_detector.py b/app/services/language_detector.py new file mode 100644 index 0000000..aac1d6e --- /dev/null +++ b/app/services/language_detector.py @@ -0,0 +1,98 @@ +""" +Language Detection Service. + +Detects the language of a learner utterance using simple heuristics +(character-set and keyword matching). Replace with a proper NLP model +(e.g. langdetect, fastText) in Part 3 without changing the interface. +""" + +from __future__ import annotations + +import re + +import structlog + +from app.models.schemas import Language + +logger = structlog.get_logger(__name__) + +# ── Script / keyword heuristics ──────────────────────────────────────────────── + +# Devanagari Unicode block: used by Hindi and Marathi +_DEVANAGARI_RE = re.compile(r"[\u0900-\u097F]") + +# Gurmukhi Unicode block: used by Punjabi +_GURMUKHI_RE = re.compile(r"[\u0A00-\u0A7F]") + +# A handful of common Marathi-specific words (romanised, for audio transcripts) +_MARATHI_KEYWORDS = {"मला", "नाही", "आहे", "करायचे", "शाळा", "मराठी"} + +# Common Punjabi romanised markers +_PUNJABI_KEYWORDS = {"ਮੈਨੂੰ", "ਨਹੀਂ", "ਹੈ", "ਕਰਨਾ", "ਪੰਜਾਬੀ"} + + +class LanguageDetector: + """ + Lightweight heuristic language detector. + + Returns one of: Language.HINDI, Language.MARATHI, Language.PUNJABI, + Language.ENGLISH (fallback). + """ + + def detect(self, text: str, fallback: Language = Language.ENGLISH) -> Language: + """ + Detect the language of *text*. + + Priority order: + 1. Gurmukhi script → Punjabi + 2. Marathi keywords → Marathi + 3. Devanagari script → Hindi + 4. Fallback → English + + Args: + text: Raw utterance string (ASR transcript or typed input). + fallback: Language to return when detection is inconclusive. + + Returns: + A Language enum value. + """ + if not text or not text.strip(): + logger.debug("lang_detector.empty_text", fallback=fallback) + return fallback + + # ── Script-based detection ───────────────────────────────────────────── + if _GURMUKHI_RE.search(text): + lang = Language.PUNJABI + elif any(kw in text for kw in _MARATHI_KEYWORDS): + lang = Language.MARATHI + elif _DEVANAGARI_RE.search(text): + # Default Devanagari → Hindi; Marathi check already passed above + lang = Language.HINDI + else: + lang = fallback + + logger.debug("lang_detector.result", detected=lang, text_snippet=text[:40]) + return lang + + def detect_from_hint(self, hint: str | None) -> Language | None: + """ + Try to parse a BCP-47 language tag from a provider hint. + + Args: + hint: e.g. "hi-IN", "mr", "pa-IN", "en-US" + + Returns: + Matched Language or None if unrecognised. + """ + if not hint: + return None + prefix = hint.split("-")[0].lower() + mapping = { + "hi": Language.HINDI, + "mr": Language.MARATHI, + "pa": Language.PUNJABI, + "en": Language.ENGLISH, + } + result = mapping.get(prefix) + logger.debug("lang_detector.hint", hint=hint, resolved=result) + return result diff --git a/app/services/nudge_engine.py b/app/services/nudge_engine.py new file mode 100644 index 0000000..d58f215 --- /dev/null +++ b/app/services/nudge_engine.py @@ -0,0 +1,132 @@ +""" +Learning Nudge Engine. + +Evaluates whether a learner should receive an outbound engagement call +and decides which nudge type is most appropriate. This is a pure +in-process service; it has no external I/O of its own — the LMS adapter +is injected. +""" + +from __future__ import annotations +from typing import Callable, List + +import structlog + +from app.adapters.lms import BaseLMSAdapter, LearnerNotFoundError +from app.core.config import settings +from app.models.schemas import LearnerActivity, NudgeDecision, NudgeType + +logger = structlog.get_logger(__name__) + + +class NudgeEngine: + """ + Rule-based nudge decision engine. + + Rules are evaluated in priority order; the first matching rule wins. + Replace or extend with an ML scoring model in a later iteration. + """ + + def __init__(self, lms_adapter: BaseLMSAdapter) -> None: + self._lms = lms_adapter + + async def evaluate(self, learner_id: str) -> NudgeDecision: + """ + Evaluate nudge eligibility for a single learner. + + Returns a NudgeDecision with should_nudge=True if any rule fires. + """ + try: + activity = await self._lms.get_learner_activity(learner_id) + except LearnerNotFoundError: + logger.warning("nudge_engine.learner_not_found", learner_id=learner_id) + return NudgeDecision( + learner_id=learner_id, + should_nudge=False, + reason="Learner not found in LMS", + ) + + decision = self._apply_rules(activity) + logger.info( + "nudge_engine.decision", + learner_id=learner_id, + should_nudge=decision.should_nudge, + nudge_type=decision.nudge_type, + reason=decision.reason, + ) + return decision + + # ── Rule evaluation ──────────────────────────────────────────────────────── + + def _apply_rules(self, activity: LearnerActivity) -> NudgeDecision: + """Apply rules in descending priority order.""" + for rule in self._rules(): + decision = rule(activity) + if decision.should_nudge: + return decision + + return NudgeDecision( + learner_id=activity.learner_id, + should_nudge=False, + reason="No nudge rule matched", + priority=0, + ) + + def _rules(self) -> List[Callable[[LearnerActivity], NudgeDecision]]: + """Return ordered list of rule functions.""" + return [ + self._rule_high_inactivity, + self._rule_pending_assignments, + self._rule_near_completion, + self._rule_moderate_inactivity, + ] + + def _rule_high_inactivity(self, activity: LearnerActivity) -> NudgeDecision: + """Nudge if inactive for ≥ 2× the configured threshold.""" + threshold = settings.NUDGE_INACTIVITY_DAYS * 2 + if activity.days_since_last_login >= threshold: + return NudgeDecision( + learner_id=activity.learner_id, + should_nudge=True, + nudge_type=NudgeType.INACTIVITY_REMINDER, + reason=f"Inactive for {activity.days_since_last_login} days (high-priority threshold: {threshold})", + priority=9, + ) + return NudgeDecision(learner_id=activity.learner_id, should_nudge=False) + + def _rule_pending_assignments(self, activity: LearnerActivity) -> NudgeDecision: + """Nudge if any course has ≥ 3 pending assignments.""" + overdue_courses = [c for c in activity.courses if c.pending_assignments >= 3] + if overdue_courses: + return NudgeDecision( + learner_id=activity.learner_id, + should_nudge=True, + nudge_type=NudgeType.ASSIGNMENT_DUE, + reason=f"{len(overdue_courses)} course(s) with ≥3 pending assignments", + priority=8, + ) + return NudgeDecision(learner_id=activity.learner_id, should_nudge=False) + + def _rule_near_completion(self, activity: LearnerActivity) -> NudgeDecision: + """Celebrate / motivate learner when ≥ 75% overall complete.""" + if activity.total_completion_percentage >= 75.0: + return NudgeDecision( + learner_id=activity.learner_id, + should_nudge=True, + nudge_type=NudgeType.MILESTONE_CELEBRATION, + reason=f"Overall completion at {activity.total_completion_percentage:.0f}%", + priority=6, + ) + return NudgeDecision(learner_id=activity.learner_id, should_nudge=False) + + def _rule_moderate_inactivity(self, activity: LearnerActivity) -> NudgeDecision: + """Nudge if inactive for ≥ the configured base threshold.""" + if activity.days_since_last_login >= settings.NUDGE_INACTIVITY_DAYS: + return NudgeDecision( + learner_id=activity.learner_id, + should_nudge=True, + nudge_type=NudgeType.INACTIVITY_REMINDER, + reason=f"Inactive for {activity.days_since_last_login} days", + priority=5, + ) + return NudgeDecision(learner_id=activity.learner_id, should_nudge=False) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d6eddbc --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,74 @@ +version: "3.9" + +services: + # ── TAP Voice Agent API ────────────────────────────────────────────────────── + api: + build: + context: . + dockerfile: Dockerfile + target: runtime + image: tap-voice-agent:dev + container_name: tap_voice_agent_api + restart: unless-stopped + ports: + - "${PORT:-8000}:8000" + env_file: + - .env + environment: + - ENVIRONMENT=development + - LOG_FORMAT=console + volumes: + # Hot-reload in development — remove in production + - ./app:/app/app:ro + depends_on: + - redis + healthcheck: + test: ["CMD", "python", "-c", + "import urllib.request; urllib.request.urlopen('http://localhost:8000/health/live')"] + interval: 15s + timeout: 5s + retries: 3 + start_period: 10s + + # ── Redis (task queue / session cache) ──────────────────────────────────────── + redis: + image: redis:7-alpine + container_name: tap_redis + restart: unless-stopped + ports: + - "6379:6379" + command: redis-server --save 60 1 --loglevel warning + + # ── Prometheus ──────────────────────────────────────────────────────────────── + prometheus: + image: prom/prometheus:v2.51.0 + container_name: tap_prometheus + restart: unless-stopped + ports: + - "9090:9090" + volumes: + - ./infra/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + depends_on: + - api + profiles: + - observability + + # ── Grafana ─────────────────────────────────────────────────────────────────── + grafana: + image: grafana/grafana:10.4.0 + container_name: tap_grafana + restart: unless-stopped + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + volumes: + - grafana_data:/var/lib/grafana + depends_on: + - prometheus + profiles: + - observability + +volumes: + grafana_data: diff --git a/infra/prometheus/prometheus.yml b/infra/prometheus/prometheus.yml new file mode 100644 index 0000000..64f74b7 --- /dev/null +++ b/infra/prometheus/prometheus.yml @@ -0,0 +1,9 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'tap_voice_agent' + metrics_path: '/metrics' + static_configs: + - targets: ['api:8000'] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4d82901 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,56 @@ +[tool.poetry] +name = "tap-voice-agent" +version = "0.1.0" +description = "Multilingual Voice Agent for Student Engagement — The Apprentice Project (C4GT 2026)" +authors = ["C4GT Contributors"] +license = "MIT" +readme = "README.md" +packages = [{ include = "app" }] + +[tool.poetry.dependencies] +python = "^3.11" +fastapi = "^0.115.0" +uvicorn = { version = "^0.34.0", extras = ["standard"] } +pydantic = "^2.11.0" +pydantic-settings = "^2.9.0" +python-dotenv = "^1.1.0" +httpx = "^0.28.0" +structlog = "^25.3.0" +prometheus-fastapi-instrumentator = "^7.1.0" + +[tool.poetry.group.dev.dependencies] +pytest = "^8.3.0" +pytest-asyncio = "^0.25.0" +pytest-cov = "^6.0.0" +httpx = "^0.28.0" +ruff = "^0.11.0" +mypy = "^1.15.0" +pre-commit = "^4.0.0" + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] +addopts = "--cov=app --cov-report=term-missing --cov-report=xml --cov-fail-under=80" +filterwarnings = [ + # Suppress utcnow deprecation from pydantic internals (third-party) + "ignore:datetime.datetime.utcnow\\(\\) is deprecated:DeprecationWarning:pydantic", + # Turn our own deprecation warnings into errors to catch regressions + "error::DeprecationWarning:app", +] + +[tool.ruff] +line-length = 100 +target-version = "py311" + +[tool.ruff.lint] +select = ["E", "W", "F", "I", "B", "UP"] +ignore = ["E501"] + +[tool.mypy] +python_version = "3.11" +strict = true +ignore_missing_imports = true + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..d74afe2 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,7 @@ +pytest==8.3.5 +pytest-asyncio==0.25.3 +pytest-cov==6.1.0 +httpx==0.28.1 +ruff==0.11.10 +mypy==1.15.0 +pre-commit==4.2.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6a1c8e9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +fastapi==0.115.12 +uvicorn[standard]==0.34.2 +pydantic==2.11.4 +pydantic-settings==2.9.1 +python-dotenv==1.1.0 +httpx==0.28.1 +structlog==25.3.0 +prometheus-fastapi-instrumentator==7.1.0 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..46816dd --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests package.""" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..e53269c --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,23 @@ +"""Shared pytest fixtures.""" + +from __future__ import annotations + +import pytest +import pytest_asyncio +from httpx import ASGITransport, AsyncClient + +from app.main import app + + +@pytest.fixture(scope="session") +def anyio_backend(): + return "asyncio" + + +@pytest_asyncio.fixture +async def client() -> AsyncClient: + """Async test client wired directly to the ASGI app (no network calls).""" + async with AsyncClient( + transport=ASGITransport(app=app), base_url="http://testserver" + ) as ac: + yield ac diff --git a/tests/test_health.py b/tests/test_health.py new file mode 100644 index 0000000..87da658 --- /dev/null +++ b/tests/test_health.py @@ -0,0 +1,46 @@ +"""Tests for the /health endpoints.""" + +from __future__ import annotations + +import pytest +from httpx import AsyncClient + + +@pytest.mark.asyncio +async def test_health_returns_ok(client: AsyncClient) -> None: + response = await client.get("/health") + assert response.status_code == 200 + body = response.json() + assert body["status"] == "ok" + assert "version" in body + assert "environment" in body + assert "checks" in body + + +@pytest.mark.asyncio +async def test_health_checks_contains_expected_keys(client: AsyncClient) -> None: + response = await client.get("/health") + checks = response.json()["checks"] + assert "lms_adapter" in checks + assert "voice_provider" in checks + + +@pytest.mark.asyncio +async def test_health_lms_mock_flag(client: AsyncClient) -> None: + """When TAP_LMS_USE_MOCK=true (default), adapter should report 'mock'.""" + response = await client.get("/health") + assert response.json()["checks"]["lms_adapter"] == "mock" + + +@pytest.mark.asyncio +async def test_liveness_probe(client: AsyncClient) -> None: + response = await client.get("/health/live") + assert response.status_code == 200 + assert response.json()["status"] == "alive" + + +@pytest.mark.asyncio +async def test_readiness_probe(client: AsyncClient) -> None: + response = await client.get("/health/ready") + assert response.status_code == 200 + assert response.json()["status"] == "ready" diff --git a/tests/test_language_detector.py b/tests/test_language_detector.py new file mode 100644 index 0000000..57863b3 --- /dev/null +++ b/tests/test_language_detector.py @@ -0,0 +1,68 @@ +"""Tests for the language detection service.""" + +from __future__ import annotations + +import pytest + +from app.models.schemas import Language +from app.services.language_detector import LanguageDetector + + +@pytest.fixture +def detector() -> LanguageDetector: + return LanguageDetector() + + +# ── Script-based detection ───────────────────────────────────────────────────── + + +def test_detects_gurmukhi_as_punjabi(detector: LanguageDetector) -> None: + assert detector.detect("ਮੈਨੂੰ ਪੜ੍ਹਨਾ ਪਸੰਦ ਹੈ") == Language.PUNJABI + + +def test_detects_devanagari_as_hindi(detector: LanguageDetector) -> None: + assert detector.detect("नमस्ते, मुझे मदद चाहिए") == Language.HINDI + + +def test_detects_marathi_keyword(detector: LanguageDetector) -> None: + assert detector.detect("मला मराठी बोलायचे आहे") == Language.MARATHI + + +def test_detects_english_fallback(detector: LanguageDetector) -> None: + assert detector.detect("Hello, I need help with my course") == Language.ENGLISH + + +def test_empty_string_returns_fallback(detector: LanguageDetector) -> None: + assert detector.detect("", fallback=Language.HINDI) == Language.HINDI + + +def test_whitespace_only_returns_fallback(detector: LanguageDetector) -> None: + assert detector.detect(" ") == Language.ENGLISH + + +# ── BCP-47 hint parsing ──────────────────────────────────────────────────────── + + +@pytest.mark.parametrize( + "hint,expected", + [ + ("hi-IN", Language.HINDI), + ("hi", Language.HINDI), + ("mr-IN", Language.MARATHI), + ("pa-IN", Language.PUNJABI), + ("en-US", Language.ENGLISH), + ("en", Language.ENGLISH), + ], +) +def test_detect_from_hint( + detector: LanguageDetector, hint: str, expected: Language +) -> None: + assert detector.detect_from_hint(hint) == expected + + +def test_detect_from_hint_unknown(detector: LanguageDetector) -> None: + assert detector.detect_from_hint("fr-FR") is None + + +def test_detect_from_hint_none(detector: LanguageDetector) -> None: + assert detector.detect_from_hint(None) is None diff --git a/tests/test_mock_lms_adapter.py b/tests/test_mock_lms_adapter.py new file mode 100644 index 0000000..611d4fb --- /dev/null +++ b/tests/test_mock_lms_adapter.py @@ -0,0 +1,58 @@ +"""Tests for the Mock LMS adapter.""" + +from __future__ import annotations + +import pytest + +from app.adapters.lms.base import LearnerNotFoundError +from app.adapters.lms.mock_adapter import MockLMSAdapter +from app.models.schemas import Language + + +@pytest.fixture +def adapter() -> MockLMSAdapter: + return MockLMSAdapter() + + +@pytest.mark.asyncio +async def test_get_known_learner(adapter: MockLMSAdapter) -> None: + profile = await adapter.get_learner_profile("learner_001") + assert profile.learner_id == "learner_001" + assert profile.preferred_language == Language.HINDI + + +@pytest.mark.asyncio +async def test_get_unknown_learner_raises(adapter: MockLMSAdapter) -> None: + with pytest.raises(LearnerNotFoundError): + await adapter.get_learner_profile("nonexistent_xyz") + + +@pytest.mark.asyncio +async def test_get_learner_activity_returns_courses(adapter: MockLMSAdapter) -> None: + activity = await adapter.get_learner_activity("learner_001") + assert activity.learner_id == "learner_001" + assert len(activity.courses) > 0 + assert 0 <= activity.total_completion_percentage <= 100 + + +@pytest.mark.asyncio +async def test_get_learner_activity_unknown_raises(adapter: MockLMSAdapter) -> None: + with pytest.raises(LearnerNotFoundError): + await adapter.get_learner_activity("ghost_learner") + + +@pytest.mark.asyncio +async def test_update_engagement_log_noop(adapter: MockLMSAdapter) -> None: + """Should not raise — mock silently logs.""" + await adapter.update_engagement_log( + learner_id="learner_001", + event_type="call_attempted", + metadata={"call_id": "c1"}, + ) + + +@pytest.mark.asyncio +async def test_all_seed_learners_have_activity(adapter: MockLMSAdapter) -> None: + for lid in ("learner_001", "learner_002", "learner_003"): + activity = await adapter.get_learner_activity(lid) + assert activity.learner_id == lid diff --git a/tests/test_nudge_engine.py b/tests/test_nudge_engine.py new file mode 100644 index 0000000..89cc02e --- /dev/null +++ b/tests/test_nudge_engine.py @@ -0,0 +1,61 @@ +"""Tests for the NudgeEngine service.""" + +from __future__ import annotations + +import pytest +import pytest_asyncio + +from app.adapters.lms.mock_adapter import MockLMSAdapter +from app.models.schemas import NudgeType +from app.services.nudge_engine import NudgeEngine + + +@pytest_asyncio.fixture +async def engine() -> NudgeEngine: + return NudgeEngine(lms_adapter=MockLMSAdapter()) + + +# ── Mock learner scenarios from mock_adapter seed data ───────────────────────── +# learner_001: 5 days inactive, 42% complete, 1 pending assignment +# learner_002: 1 day inactive, 80% complete, 0 pending assignments +# learner_003: 10 days inactive, 15% complete, 6 pending assignments total + + +@pytest.mark.asyncio +async def test_high_inactivity_triggers_nudge(engine: NudgeEngine) -> None: + """learner_003 has been inactive 10 days — should fire high-priority nudge.""" + decision = await engine.evaluate("learner_003") + assert decision.should_nudge is True + assert decision.nudge_type == NudgeType.INACTIVITY_REMINDER + assert decision.priority >= 7 + + +@pytest.mark.asyncio +async def test_pending_assignments_triggers_nudge(engine: NudgeEngine) -> None: + """learner_003 has 6 total pending assignments — ASSIGNMENT_DUE should fire.""" + decision = await engine.evaluate("learner_003") + assert decision.should_nudge is True + # Either INACTIVITY_REMINDER (higher priority) or ASSIGNMENT_DUE is acceptable + assert decision.nudge_type in {NudgeType.INACTIVITY_REMINDER, NudgeType.ASSIGNMENT_DUE} + + +@pytest.mark.asyncio +async def test_near_completion_milestone(engine: NudgeEngine) -> None: + """learner_002 is at 80% completion — should receive MILESTONE_CELEBRATION.""" + decision = await engine.evaluate("learner_002") + assert decision.should_nudge is True + assert decision.nudge_type == NudgeType.MILESTONE_CELEBRATION + + +@pytest.mark.asyncio +async def test_unknown_learner_no_nudge(engine: NudgeEngine) -> None: + """Non-existent learner should return should_nudge=False without raising.""" + decision = await engine.evaluate("learner_does_not_exist") + assert decision.should_nudge is False + assert "not found" in decision.reason.lower() + + +@pytest.mark.asyncio +async def test_decision_has_learner_id(engine: NudgeEngine) -> None: + decision = await engine.evaluate("learner_001") + assert decision.learner_id == "learner_001" diff --git a/tests/test_schemas.py b/tests/test_schemas.py new file mode 100644 index 0000000..443f216 --- /dev/null +++ b/tests/test_schemas.py @@ -0,0 +1,89 @@ +"""Tests for Pydantic schemas — validation rules and serialisation.""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from app.models.schemas import ( + CallRecord, + CallStatus, + Language, + LearnerProfile, + NudgeType, + OutboundCallRequest, + WebhookPayload, +) + + +class TestLearnerProfile: + def test_valid_profile(self) -> None: + p = LearnerProfile( + learner_id="l1", + name="Test User", + phone_number="+919876543210", + ) + assert p.preferred_language == Language.ENGLISH + assert p.completion_percentage == 0.0 + + def test_invalid_phone_number(self) -> None: + with pytest.raises(ValidationError): + LearnerProfile(learner_id="l1", name="X", phone_number="not_a_phone") + + def test_completion_out_of_range(self) -> None: + with pytest.raises(ValidationError): + LearnerProfile( + learner_id="l1", + name="X", + phone_number="+919876543210", + completion_percentage=150.0, + ) + + +class TestOutboundCallRequest: + def test_defaults(self) -> None: + req = OutboundCallRequest( + learner_id="l1", + nudge_type=NudgeType.INACTIVITY_REMINDER, + ) + assert req.preferred_language == Language.ENGLISH + assert req.scheduled_at is None + assert req.metadata == {} + + def test_invalid_nudge_type(self) -> None: + with pytest.raises(ValidationError): + OutboundCallRequest(learner_id="l1", nudge_type="unknown_type") + + +class TestCallRecord: + def test_default_status_is_pending(self) -> None: + record = CallRecord( + learner_id="l1", + nudge_type=NudgeType.PROGRESS_UPDATE, + language=Language.HINDI, + ) + assert record.status == CallStatus.PENDING + assert record.provider == "mock" + + def test_call_id_is_uuid(self) -> None: + record = CallRecord( + learner_id="l1", + nudge_type=NudgeType.PROGRESS_UPDATE, + language=Language.HINDI, + ) + assert str(record.call_id) # UUID is truthy + + +class TestWebhookPayload: + def test_minimal_payload(self) -> None: + p = WebhookPayload( + provider="vapi", + event_type="call.started", + call_id="c123", + ) + assert p.learner_id is None + assert p.metadata == {} + + def test_received_at_auto_populated(self) -> None: + p = WebhookPayload(provider="twilio", event_type="transcript", call_id="c1") + assert p.received_at is not None diff --git a/tests/test_webhooks.py b/tests/test_webhooks.py new file mode 100644 index 0000000..16f4fcc --- /dev/null +++ b/tests/test_webhooks.py @@ -0,0 +1,64 @@ +"""Tests for inbound webhook endpoints.""" + +from __future__ import annotations + +import pytest +from httpx import AsyncClient + + +@pytest.mark.asyncio +async def test_voice_webhook_accepted(client: AsyncClient) -> None: + payload = { + "provider": "mock", + "event_type": "transcript", + "call_id": "call_abc123", + "learner_id": "learner_001", + "transcript": "नमस्ते, मुझे मदद चाहिए", + "language_hint": "hi-IN", + "metadata": {}, + } + response = await client.post("/api/v1/webhooks/voice", json=payload) + assert response.status_code == 202 + body = response.json() + assert body["status"] == "accepted" + assert body["call_id"] == "call_abc123" + + +@pytest.mark.asyncio +async def test_voice_webhook_missing_required_fields(client: AsyncClient) -> None: + """Missing event_type / call_id should return 422.""" + response = await client.post("/api/v1/webhooks/voice", json={"provider": "mock"}) + assert response.status_code == 422 + + +@pytest.mark.asyncio +async def test_whatsapp_webhook_accepted(client: AsyncClient) -> None: + payload = {"object": "whatsapp_business_account", "entry": []} + response = await client.post("/api/v1/webhooks/whatsapp", json=payload) + assert response.status_code == 200 + assert response.json()["status"] == "accepted" + + +@pytest.mark.asyncio +async def test_whatsapp_verify_valid_token(client: AsyncClient) -> None: + """Meta hub.challenge verification with correct token should echo challenge.""" + params = { + "hub.mode": "subscribe", + "hub.verify_token": "tap_verify_token", + "hub.challenge": "999888", + } + response = await client.get("/api/v1/webhooks/whatsapp", params=params) + assert response.status_code == 200 + assert response.text == "999888" + + +@pytest.mark.asyncio +async def test_whatsapp_verify_invalid_token(client: AsyncClient) -> None: + """Wrong verify_token should return 403.""" + params = { + "hub.mode": "subscribe", + "hub.verify_token": "wrong_token", + "hub.challenge": "123", + } + response = await client.get("/api/v1/webhooks/whatsapp", params=params) + assert response.status_code == 403