diff --git a/autobot-backend/api/conversation_export.py b/autobot-backend/api/conversation_export.py new file mode 100644 index 000000000..73a8b5596 --- /dev/null +++ b/autobot-backend/api/conversation_export.py @@ -0,0 +1,261 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Conversation Export and Import API (#1808) + +FastAPI router for conversation export and import endpoints. + +Registered in feature_routers.py as: + ("api.conversation_export", "/conversations", ["conversation-export"], "conversation_export") + +Endpoints: + GET /api/conversations/{session_id}/export?format=json|markdown + GET /api/conversations/export-all + POST /api/conversations/import +""" + +import logging +from typing import Optional + +from fastapi import APIRouter, Depends, Query, Request +from fastapi.responses import Response +from pydantic import BaseModel, Field + +from auth_middleware import get_current_user +from autobot_shared.error_boundaries import ErrorCategory, with_error_handling +from services.conversation_export import ( + export_all_conversations_json, + export_conversation_json, + export_conversation_markdown, + import_conversation, +) +from utils.chat_utils import get_chat_history_manager, validate_chat_session_id +from utils.chat_exceptions import get_exceptions_lazy + +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["conversation-export"]) + +# Valid export format set for O(1) lookup +_VALID_EXPORT_FORMATS = frozenset({"json", "markdown"}) + +# Content-type mapping keyed by format string +_CONTENT_TYPES = { + "json": "application/json", + "markdown": "text/markdown", +} + +# File extension mapping keyed by format string +_FILE_EXTENSIONS = { + "json": "json", + "markdown": "md", +} + +# Valid on_conflict values +_VALID_ON_CONFLICT = frozenset({"skip", "replace", "rename"}) + + +# --------------------------------------------------------------------------- +# Request / Response models +# --------------------------------------------------------------------------- + + +class ConversationImportRequest(BaseModel): + """Request body for importing a conversation (#1808).""" + + document: dict = Field( + ..., + description=( + "AutoBot conversation export document produced by the export endpoint " + "(format: autobot-conversation-v1)." + ), + ) + on_conflict: str = Field( + default="skip", + description=( + "Conflict resolution strategy when session_id already exists. " + "One of: skip, replace, rename." + ), + ) + + +# --------------------------------------------------------------------------- +# Validation helpers +# --------------------------------------------------------------------------- + + +def _validate_format_or_raise(export_format: str) -> None: + """Raise ValidationError when the requested format is not supported.""" + if export_format not in _VALID_EXPORT_FORMATS: + _, _, _, ValidationError, _ = get_exceptions_lazy() + raise ValidationError( + f"Invalid format {export_format!r}. Supported: json, markdown" + ) + + +def _validate_session_id_or_raise(session_id: str) -> None: + """Raise ValidationError when session_id is not well-formed.""" + if not validate_chat_session_id(session_id): + _, _, _, ValidationError, _ = get_exceptions_lazy() + raise ValidationError(f"Invalid session_id: {session_id!r}") + + +def _validate_on_conflict_or_raise(on_conflict: str) -> None: + """Raise ValidationError when on_conflict value is not recognised.""" + if on_conflict not in _VALID_ON_CONFLICT: + _, _, _, ValidationError, _ = get_exceptions_lazy() + raise ValidationError( + f"Invalid on_conflict {on_conflict!r}. Supported: skip, replace, rename" + ) + + +# --------------------------------------------------------------------------- +# Export helpers +# --------------------------------------------------------------------------- + + +async def _run_export( + chat_history_manager, + session_id: str, + export_format: str, +) -> str: + """Run the appropriate exporter and raise ResourceNotFoundError on None.""" + _, _, ResourceNotFoundError, _, _ = get_exceptions_lazy() + + if export_format == "json": + result = await export_conversation_json(chat_history_manager, session_id) + else: + result = await export_conversation_markdown(chat_history_manager, session_id) + + if result is None: + raise ResourceNotFoundError(f"Session {session_id!r} not found") + return result + + +def _build_export_response( + content: str, session_id: str, export_format: str +) -> Response: + """Wrap export content in a download Response with correct headers.""" + ext = _FILE_EXTENSIONS[export_format] + return Response( + content=content.encode("utf-8"), + media_type=_CONTENT_TYPES[export_format], + headers={ + "Content-Disposition": ( + f"attachment; filename=conversation_{session_id}.{ext}" + ) + }, + ) + + +# --------------------------------------------------------------------------- +# Endpoints +# --------------------------------------------------------------------------- + + +@with_error_handling( + category=ErrorCategory.SERVER_ERROR, + operation="export_conversation", + error_code_prefix="CONVEXPORT", +) +@router.get("/conversations/{session_id}/export") +async def export_conversation( + session_id: str, + request: Request, + format: str = Query(default="json", description="Export format: json or markdown"), + current_user: dict = Depends(get_current_user), +): + """ + Export a single conversation in JSON or Markdown format (#1808). + + - **json**: Enriched AutoBot JSON envelope with metadata and messages. + - **markdown**: Human-readable Markdown with message headers. + """ + _validate_session_id_or_raise(session_id) + _validate_format_or_raise(format) + + chat_history_manager = get_chat_history_manager(request) + content = await _run_export(chat_history_manager, session_id, format) + + logger.info( + "Exported conversation %s as %s for user %s", + session_id, + format, + current_user.get("username", "unknown"), + ) + return _build_export_response(content, session_id, format) + + +@with_error_handling( + category=ErrorCategory.SERVER_ERROR, + operation="export_all_conversations", + error_code_prefix="CONVEXPORT", +) +@router.get("/conversations/export-all") +async def export_all_conversations( + request: Request, + current_user: dict = Depends(get_current_user), +): + """ + Export all conversations as a bulk JSON archive (#1808). + + Returns a JSON file containing every conversation stored on this instance. + """ + chat_history_manager = get_chat_history_manager(request) + archive = await export_all_conversations_json(chat_history_manager) + + if archive is None: + _, InternalError, _, _, _ = get_exceptions_lazy() + raise InternalError("Failed to build conversation archive") + + logger.info( + "Bulk conversation export requested by user %s", + current_user.get("username", "unknown"), + ) + return Response( + content=archive.encode("utf-8"), + media_type="application/json", + headers={ + "Content-Disposition": "attachment; filename=conversations_export.json" + }, + ) + + +@with_error_handling( + category=ErrorCategory.SERVER_ERROR, + operation="import_conversation", + error_code_prefix="CONVEXPORT", +) +@router.post("/conversations/import") +async def import_conversation_endpoint( + body: ConversationImportRequest, + request: Request, + current_user: dict = Depends(get_current_user), +): + """ + Import a conversation from an AutoBot JSON export document (#1808). + + The ``on_conflict`` field controls what happens when the session_id already + exists: + - **skip** (default): return without modifying existing data. + - **replace**: overwrite the existing session with the imported data. + - **rename**: save under a new session_id with a timestamped suffix. + """ + _validate_on_conflict_or_raise(body.on_conflict) + + chat_history_manager = get_chat_history_manager(request) + result = await import_conversation( + chat_history_manager, + document=body.document, + on_conflict=body.on_conflict, + ) + + logger.info( + "Conversation import by user %s: success=%s session=%s conflict=%s", + current_user.get("username", "unknown"), + result.get("success"), + result.get("session_id"), + result.get("conflict"), + ) + return result diff --git a/autobot-backend/initialization/router_registry/feature_routers.py b/autobot-backend/initialization/router_registry/feature_routers.py index d03d63db9..21038cc83 100644 --- a/autobot-backend/initialization/router_registry/feature_routers.py +++ b/autobot-backend/initialization/router_registry/feature_routers.py @@ -423,6 +423,13 @@ ["triggers", "workflow"], "triggers", ), + # Issue #1808: Conversation export and import + ( + "api.conversation_export", + "/conversations", + ["conversation-export"], + "conversation_export", + ), ] diff --git a/autobot-backend/services/conversation_export.py b/autobot-backend/services/conversation_export.py new file mode 100644 index 000000000..814138403 --- /dev/null +++ b/autobot-backend/services/conversation_export.py @@ -0,0 +1,300 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Conversation Export and Import Service (#1808) + +Provides export and import operations for chat conversations: +- Export single conversation as enriched JSON (with metadata, token info, model) +- Export single conversation as human-readable Markdown +- Bulk export all conversations as a JSON archive +- Import AutoBot JSON format with duplicate detection by session_id +""" + +import json +import logging +import time +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + +# Format identifier embedded in every exported archive +AUTOBOT_EXPORT_FORMAT = "autobot-conversation-v1" + + +# --------------------------------------------------------------------------- +# Internal helpers — message formatting +# --------------------------------------------------------------------------- + + +def _render_message_markdown(msg: Dict[str, Any], index: int) -> str: + """Render a single message as a Markdown block.""" + sender = msg.get("sender") or msg.get("role", "unknown") + timestamp = msg.get("timestamp", "") + text = msg.get("text") or msg.get("content", "") + header = f"### Message {index + 1} — {sender}" + if timestamp: + header += f" ({timestamp})" + return f"{header}\n\n{text}" + + +def _render_session_metadata_markdown( + session_id: str, chat_data: Dict[str, Any] +) -> List[str]: + """Build Markdown header lines from session metadata.""" + lines = [ + f"# Conversation Export: {session_id}", + "", + f"**Session ID:** {session_id}", + f"**Name:** {chat_data.get('name', '')}", + f"**Created:** {chat_data.get('created_time', chat_data.get('createdTime', ''))}", + f"**Last Modified:** {chat_data.get('last_modified', chat_data.get('lastModified', ''))}", + f"**Message Count:** {len(chat_data.get('messages', []))}", + "", + "---", + "", + ] + return lines + + +# --------------------------------------------------------------------------- +# Export helpers +# --------------------------------------------------------------------------- + + +def _build_json_envelope( + session_id: str, chat_data: Dict[str, Any] +) -> Dict[str, Any]: + """Wrap raw session data in the versioned AutoBot JSON export envelope.""" + return { + "format": AUTOBOT_EXPORT_FORMAT, + "exported_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "session_id": session_id, + "name": chat_data.get("name", ""), + "created_time": chat_data.get("created_time", chat_data.get("createdTime", "")), + "last_modified": chat_data.get( + "last_modified", chat_data.get("lastModified", "") + ), + "metadata": chat_data.get("metadata", {}), + "messages": chat_data.get("messages", []), + "message_count": len(chat_data.get("messages", [])), + } + + +def _build_bulk_envelope(sessions: List[Dict[str, Any]]) -> Dict[str, Any]: + """Wrap multiple session envelopes in a bulk archive envelope.""" + return { + "format": f"{AUTOBOT_EXPORT_FORMAT}-bulk", + "exported_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "conversation_count": len(sessions), + "conversations": sessions, + } + + +# --------------------------------------------------------------------------- +# Public export functions +# --------------------------------------------------------------------------- + + +async def export_conversation_json( + chat_history_manager, session_id: str +) -> Optional[str]: + """ + Export a single conversation as enriched AutoBot JSON. + + Returns serialised JSON string, or None on error. + """ + try: + chat_data = await _load_full_session_data(chat_history_manager, session_id) + if chat_data is None: + return None + envelope = _build_json_envelope(session_id, chat_data) + return json.dumps(envelope, indent=2, ensure_ascii=False) + except Exception as exc: + logger.error("Failed to export session %s as JSON: %s", session_id, exc) + return None + + +async def export_conversation_markdown( + chat_history_manager, session_id: str +) -> Optional[str]: + """ + Export a single conversation as human-readable Markdown. + + Returns Markdown string, or None on error. + """ + try: + chat_data = await _load_full_session_data(chat_history_manager, session_id) + if chat_data is None: + return None + lines = _render_session_metadata_markdown(session_id, chat_data) + for i, msg in enumerate(chat_data.get("messages", [])): + lines.append(_render_message_markdown(msg, i)) + lines.append("") + return "\n".join(lines) + except Exception as exc: + logger.error("Failed to export session %s as Markdown: %s", session_id, exc) + return None + + +async def export_all_conversations_json(chat_history_manager) -> Optional[str]: + """ + Export all conversations as a bulk JSON archive. + + Returns serialised JSON string, or None on error. + """ + try: + sessions = await chat_history_manager.list_sessions() + envelopes = [] + for session_info in sessions: + session_id = session_info.get("chatId") or session_info.get("id", "") + if not session_id: + continue + chat_data = await _load_full_session_data( + chat_history_manager, session_id + ) + if chat_data is not None: + envelopes.append(_build_json_envelope(session_id, chat_data)) + archive = _build_bulk_envelope(envelopes) + return json.dumps(archive, indent=2, ensure_ascii=False) + except Exception as exc: + logger.error("Failed to bulk export conversations: %s", exc) + return None + + +# --------------------------------------------------------------------------- +# Import helpers +# --------------------------------------------------------------------------- + + +def _validate_import_document(document: Dict[str, Any]) -> Tuple[bool, str]: + """ + Validate an import document. + + Returns (is_valid, error_message). error_message is empty on success. + """ + fmt = document.get("format", "") + if not fmt.startswith("autobot-conversation-v"): + return False, f"Unrecognised format: {fmt!r}" + if "session_id" not in document: + return False, "Missing required field: session_id" + if "messages" not in document: + return False, "Missing required field: messages" + return True, "" + + +async def _session_exists(chat_history_manager, session_id: str) -> bool: + """Return True when the session already exists in storage.""" + try: + messages = await chat_history_manager.load_session(session_id) + return len(messages) > 0 + except Exception: + return False + + +def _apply_suffix_to_session_id(session_id: str, suffix: str) -> str: + """Return a new session_id with the given suffix appended.""" + return f"{session_id}-{suffix}" + + +# --------------------------------------------------------------------------- +# Public import function +# --------------------------------------------------------------------------- + + +async def import_conversation( + chat_history_manager, + document: Dict[str, Any], + on_conflict: str = "skip", +) -> Dict[str, Any]: + """ + Import a conversation from an AutoBot JSON export document. + + Args: + chat_history_manager: Chat history manager instance. + document: Parsed export document (must conform to AUTOBOT_EXPORT_FORMAT). + on_conflict: One of "skip", "replace", or "rename". + - "skip" — return without saving when session_id already exists. + - "replace" — overwrite the existing session. + - "rename" — save under a new session_id with an "-imported" suffix. + + Returns: + Dict with keys: success, session_id, conflict, message. + """ + valid, err = _validate_import_document(document) + if not valid: + return {"success": False, "session_id": None, "conflict": False, "message": err} + + session_id: str = document["session_id"] + messages: List[Dict[str, Any]] = document.get("messages", []) + name: str = document.get("name", "") + + exists = await _session_exists(chat_history_manager, session_id) + + if exists: + if on_conflict == "skip": + logger.info("Import skipped: session %s already exists", session_id) + return { + "success": False, + "session_id": session_id, + "conflict": True, + "message": f"Session {session_id!r} already exists (on_conflict=skip)", + } + if on_conflict == "rename": + suffix = str(int(time.time())) + session_id = _apply_suffix_to_session_id(session_id, f"imported-{suffix}") + logger.info("Import renamed to %s due to conflict", session_id) + + await chat_history_manager.save_session( + session_id=session_id, messages=messages, name=name + ) + logger.info("Imported conversation %s (%d messages)", session_id, len(messages)) + return { + "success": True, + "session_id": session_id, + "conflict": exists, + "message": f"Imported {len(messages)} messages into session {session_id!r}", + } + + +# --------------------------------------------------------------------------- +# Internal utility +# --------------------------------------------------------------------------- + + +async def _load_full_session_data( + chat_history_manager, session_id: str +) -> Optional[Dict[str, Any]]: + """ + Load the full session file data dict (not just the messages list). + + Returns None when the session does not exist or loading fails. + """ + try: + chats_directory = chat_history_manager._get_chats_directory() + import os + + import aiofiles + + from autobot_shared.security.path_validator import validate_relative_path + + for filename_template in ( + f"{session_id}_chat.json", + f"chat_{session_id}.json", + ): + try: + chat_file = str( + validate_relative_path(filename_template, chats_directory) + ) + except ValueError: + continue + if not os.path.exists(chat_file): + continue + async with aiofiles.open(chat_file, "r", encoding="utf-8") as fh: + raw = await fh.read() + return chat_history_manager._decrypt_data(raw) + logger.warning("Session file not found for %s", session_id) + return None + except Exception as exc: + logger.error("Error loading full session data for %s: %s", session_id, exc) + return None