From 4f6bbee37f5d3f42bc597e11ca9327258741345f Mon Sep 17 00:00:00 2001 From: bazz333 Date: Thu, 8 May 2025 12:47:25 +0200 Subject: [PATCH 1/7] fix: semplifica la gestione dell'eliminazione dei file e aggiorna il file requirements.txt con versioni specifiche --- app/routes/documents.py | 39 ++++------------------ app/services/file_manager_service.py | 50 ++++++++++++++-------------- requirements.txt | 34 +++++++++---------- 3 files changed, 47 insertions(+), 76 deletions(-) diff --git a/app/routes/documents.py b/app/routes/documents.py index 63aa1f6..ce9c9ca 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -103,39 +103,12 @@ async def delete_file(fileDelete: schemas.DocumentDelete): file_manager = get_file_manager_by_extension(fileDelete.title) if file_manager is None: raise HTTPException(status_code=400, detail="File manager not found") - try: - file_path = file_manager.get_full_path(fileDelete.title) - print("file path:", file_path) - await file_manager.delete_document( - fileDelete.id, file_path, fileDelete.token, fileDelete.current_password - ) - except HTTPException as e: - match e.status_code: - case 404: - print("error detail:", e.detail) - raise HTTPException( - status_code=404, - detail="Document not found", - ) - case 500: - print("error detail:", e.detail) - raise HTTPException( - status_code=500, - detail="Error in deleting file", - ) - case _: - print("error detail:", e.detail) - raise HTTPException( - status_code=500, - detail="Error in deleting file", - ) - - except Exception as e: - print("error detail:", e) - raise HTTPException( - status_code=500, - detail="Error in deleting file", - ) + + file_path = file_manager.get_full_path(fileDelete.title) + print("file path:", file_path) + await file_manager.delete_document( + fileDelete.id, file_path, fileDelete.token, fileDelete.current_password + ) return {"message": "File deleted successfully"} diff --git a/app/services/file_manager_service.py b/app/services/file_manager_service.py index e8cd390..baab201 100644 --- a/app/services/file_manager_service.py +++ b/app/services/file_manager_service.py @@ -138,32 +138,8 @@ async def delete_document( Returns: - bool: True se il file è stato eliminato correttamente, False altrimenti. """ - # rimuovi da filesystem - print("INIZIO RIMOZIONE DOCUMENTO") - print("file_path:", file_path) - print("os.path.isfile(file_path):", os.path.isfile(file_path)) - print("ls -la /data/documents", os.listdir("/data/documents")) - - if os.path.isfile(file_path) and os.path.exists(file_path): - try: - os.remove(file_path) - except Exception as e: - raise HTTPException( - status_code=404, - detail=f"File {file_path} non trovato: {e}", - ) - logger.info(f"File {file_path} eliminato") - else: - raise HTTPException( - status_code=404, - detail=f"File {file_path} non trovato", - ) - - # rimuovi da database vettoriale - self.vector_database.delete_document(file_path) # rimuovi da Database API - print("[LLM API] file_id: pre DELETE: ", file_id, type(file_id)) delete_req = requests.delete( f"http://database-api:8000/documents", headers={ @@ -188,11 +164,35 @@ async def delete_document( status_code=400, detail=f"Documento non trovato", ) + case 401: + raise HTTPException( + status_code=401, + detail=f"Password errata", + ) case 500: raise HTTPException( status_code=500, - detail=f"Errore nel caricare e processare file", + detail=f"Errore nel caricare e processare file {delete_req.text}", + ) + + # rimuovi da filesystem + if os.path.isfile(file_path) and os.path.exists(file_path): + try: + os.remove(file_path) + except Exception as e: + raise HTTPException( + status_code=404, + detail=f"File {file_path} non trovato: {e}", ) + logger.info(f"File {file_path} eliminato") + else: + raise HTTPException( + status_code=404, + detail=f"File {file_path} non trovato", + ) + + # rimuovi da database vettoriale + self.vector_database.delete_document(file_path) class TextFileManager(FileManager): diff --git a/requirements.txt b/requirements.txt index e3ba8ae..18b953d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,17 @@ # required packages -fastapi -uvicorn -starlette -openai -langchain -langchain-openai -langchain_chroma -langchain_community -chromadb -bson -pytest -pytest-mock -pytest-asyncio -httpx - -pytest -pypdf -python-multipart +fastapi==0.115.12 +uvicorn==0.34.2 +starlette==0.46.2 +openai==1.77.0 +langchain==0.3.25 +langchain-openai==0.3.16 +langchain_chroma==0.2.3 +langchain_community==0.3.23 +chromadb==0.6.3 +bson==0.5.10 +pytest==8.3.5 +pytest-mock==3.14.0 +pytest-asyncio==0.26.0 +httpx==0.28.1 +pypdf==5.4.0 +python-multipart==0.0.20 \ No newline at end of file From 2cebbabaf95f8bf2c673b0a44348ee8030e3fdcc Mon Sep 17 00:00:00 2001 From: bazz333 Date: Thu, 8 May 2025 16:26:44 +0200 Subject: [PATCH 2/7] feat: aggiungi la funzione per ottenere il numero di documenti e migliora la gestione degli errori nel servizio di risposta LLM --- app/routes/documents.py | 4 +++- app/services/file_manager_service.py | 21 +++++++++++++++++---- app/services/llm_response_service.py | 8 +++++--- app/services/vector_database_service.py | 23 ++++++++++++++++++----- 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/app/routes/documents.py b/app/routes/documents.py index ce9c9ca..4a0a663 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -124,5 +124,7 @@ def get_documents(): Raises: - HTTPException: Se si verifica un errore durante il recupero dei documenti. """ + file_manager = get_file_manager() + return file_manager.get_documents_number(), os.listdir("/data/documents") + - return os.listdir("/data/documents") diff --git a/app/services/file_manager_service.py b/app/services/file_manager_service.py index baab201..0c86ec8 100644 --- a/app/services/file_manager_service.py +++ b/app/services/file_manager_service.py @@ -36,6 +36,17 @@ def get_full_path(self, filename: str) -> str: documents_dir = os.environ.get("DOCUMENTS_DIR", "/data/documents") return os.path.join(documents_dir, filename) + def get_documents_number(self): + """ + Restituisce le statistiche sui documenti. + + Returns: + - dict: Un dizionario contenente le statistiche sui documenti. + """ + # self.vector_database.delete_all_documents() + return self.vector_database.count() + + async def _save_file(self, file: File): """ Salva il file nel filesystem. @@ -166,9 +177,9 @@ async def delete_document( ) case 401: raise HTTPException( - status_code=401, - detail=f"Password errata", - ) + status_code=401, + detail=f"Password errata", + ) case 500: raise HTTPException( status_code=500, @@ -215,7 +226,7 @@ class StringManager(FileManager): pass -def get_file_manager(file: UploadFile): +def get_file_manager(file: UploadFile = None): """ Restituisce il file manager in base al tipo di file. @@ -225,6 +236,8 @@ def get_file_manager(file: UploadFile): Returns: - FileManager: Il file manager appropriato. """ + if file is None: + return TextFileManager() match file.content_type: case "text/plain": return TextFileManager() diff --git a/app/services/llm_response_service.py b/app/services/llm_response_service.py index 0514a8f..1ed9479 100644 --- a/app/services/llm_response_service.py +++ b/app/services/llm_response_service.py @@ -33,10 +33,10 @@ def _get_context(self, question: str) -> str: try: question_context = self.vector_database.search_context(question) if not question_context: - raise ValueError("No context found for the question.") + return "" return question_context except Exception as e: - logger.error(f"Error getting context: {str(e)}", exc_info=True) + print(f"Error getting context: {str(e)}", exc_info=True) raise HTTPException( status_code=500, detail=f"Error getting context: {str(e)}" ) @@ -46,7 +46,6 @@ def generate_llm_response(self, question: schemas.Question) -> StreamingResponse # TODO: gestire array messaggi formatted_messages = "" - print(f"question.messages: {question.messages}") if question.messages: if isinstance(question.messages, list): formatted_messages = "\n".join( @@ -64,6 +63,9 @@ def generate_llm_response(self, question: schemas.Question) -> StreamingResponse SystemMessage(f"Conversazione precedente: {formatted_messages}"), HumanMessage(f"Domanda a cui devi rispondere: {question}"), ] + print() + print(f"PROMPT: {context} {context_messages}") + print() try: stream_response = self.LLM.model.astream(messages) diff --git a/app/services/vector_database_service.py b/app/services/vector_database_service.py index 35a883d..ef8f546 100644 --- a/app/services/vector_database_service.py +++ b/app/services/vector_database_service.py @@ -140,17 +140,30 @@ def search_context(self, query: str, results_number: int = 2) -> List[Document]: logger.error(f"Errore durante la similarity search: {e}", exc_info=True) return [] + def delete_all_documents(self): + """Elimina tutti i documenti dal database.""" + try: + db = self._get_db() + db.reset_collection() + print("[VECTOR DB] Tutti i documenti eliminati.") + except Exception as e: + logger.error(f"Errore durante l'eliminazione di tutti i documenti: {e}", exc_info=True) + raise + # metodi ausiliari def _get_collection_count(self) -> int: """Helper per gestire accesso a dettagli Chroma.""" try: - client = self._db.get() - if client and self._db._collection: - return self._db._collection.count() + db_instance = self._get_db() + if db_instance and db_instance._collection: + return db_instance._collection.count() + print( + "Impossibile ottenere il count: istanza DB o _collection non disponibile dopo _get_db()." + ) return 0 except Exception as e: - logger.warning( - f"Impossibile ottenere il count della collection (potrebbe non esistere ancora): {e}" + print( + f"Errore durante il recupero del count della collection: {e}" ) return 0 From 4a28b9ea0bb270e6e30d50aa25e8a9b3d7ae7051 Mon Sep 17 00:00:00 2001 From: lucaribon Date: Thu, 8 May 2025 18:59:45 +0200 Subject: [PATCH 3/7] fix: aggiunta faq al contesto; Co-authored-by: Matteo Bazzan --- app/main.py | 4 +- app/routes/documents.py | 31 +++-- app/routes/faq.py | 88 ++++++++++++++ app/schemas.py | 45 +++++-- app/services/file_manager_service.py | 153 +++++++++++++++++++++++- app/services/vector_database_service.py | 46 ++++--- app/utils.py | 14 +++ 7 files changed, 325 insertions(+), 56 deletions(-) create mode 100644 app/routes/faq.py create mode 100644 app/utils.py diff --git a/app/main.py b/app/main.py index f0922b3..70b9ff8 100644 --- a/app/main.py +++ b/app/main.py @@ -1,6 +1,5 @@ from fastapi import FastAPI -from app.routes import llm -from app.routes import documents +from app.routes import llm, documents, faq app = FastAPI( title="LLM API", @@ -10,3 +9,4 @@ app.include_router(llm.router) app.include_router(documents.router) +app.include_router(faq.router) diff --git a/app/routes/documents.py b/app/routes/documents.py index 4a0a663..80c4b27 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -3,7 +3,6 @@ from typing import List import os -# from app.services.chroma_services import embedding from app.services.file_manager_service import ( get_file_manager, get_file_manager_by_extension, @@ -18,18 +17,17 @@ ) -@router.post("/upload_file") +@router.post("") async def upload_file(files: List[UploadFile], token: str): """ Carica il file nel database vettoriale - Args: - - files (List[UploadFile]): I file da caricare. Devono essere file di testo o PDF. + ### Args: + * **files (List[UploadFile])**: I file da caricare. Devono essere file di testo o PDF. - Raises: - - HTTPException: Se il file non è valido o se si verifica un errore durante il caricamento. - - HTTPException: Se il file esiste già nel database vettoriale. - - HTTPException: Se si verifica un errore durante il caricamento e l'elaborazione del file. + ### Raises: + * **HTTPException.400_BAD_REQUEST**: Se non sono stati forniti file o se i file non sono di tipo testo o PDF. + * **HTTPException.500_INTERNAL_SERVER_ERROR**: Se si verifica un errore durante il caricamento dei file. """ if not files: @@ -97,8 +95,17 @@ async def upload_file(files: List[UploadFile], token: str): } -@router.delete("/delete_file") +@router.delete("") async def delete_file(fileDelete: schemas.DocumentDelete): + """ + Elimina un file dal database. + + ### Args: + * **fileDelete (schemas.DocumentDelete)**: Il file da eliminare. Deve contenere il titolo, il token e la password corrente. + ### Raises: + * **HTTPException.400_BAD_REQUEST**: Se il file non esiste o se si verifica un errore durante l'eliminazione. + * **HTTPException.500_INTERNAL_SERVER_ERROR**: Se si verifica un errore durante l'eliminazione del file. + """ print("delete file title:", fileDelete) file_manager = get_file_manager_by_extension(fileDelete.title) if file_manager is None: @@ -113,7 +120,7 @@ async def delete_file(fileDelete: schemas.DocumentDelete): return {"message": "File deleted successfully"} -@router.get("/get_documents") +@router.get("") def get_documents(): """ Ottiene la lista dei documenti dal database. @@ -125,6 +132,4 @@ def get_documents(): - HTTPException: Se si verifica un errore durante il recupero dei documenti. """ file_manager = get_file_manager() - return file_manager.get_documents_number(), os.listdir("/data/documents") - - + return file_manager.get_documents_number(), file_manager.get_documents(),os.listdir("/data/documents") diff --git a/app/routes/faq.py b/app/routes/faq.py new file mode 100644 index 0000000..b71fc8f --- /dev/null +++ b/app/routes/faq.py @@ -0,0 +1,88 @@ +from fastapi import APIRouter, Depends, HTTPException, File, UploadFile +from app.services.llm_service import LLM, OpenAI +from typing import List +import requests + +from app.services.file_manager_service import ( + get_file_manager, + get_file_manager_by_extension, +) + +import app.schemas as schemas + + +router = APIRouter( + tags=["faqs"], + prefix="/faqs", +) + +@router.post("") +async def create_faq(faq: schemas.FAQCreate, token:str): + """ + Crea una nuova FAQ. + + ### Args: + * **faq (schemas.FAQCreate)**: I dati della FAQ da creare. + * **token (str)**: Il token di autenticazione. + + ### Raises: + * **HTTPException.400_BAD_REQUEST**: Se non sono stati forniti dati per la creazione della FAQ. + * **HTTPException.500_INTERNAL_SERVER_ERROR**: Se si verifica un errore durante la creazione della FAQ. + """ + if not faq: + raise HTTPException(status_code=400, detail="No data provided for creation") + + file_manager = get_file_manager_by_extension() + if file_manager is None: + raise HTTPException(status_code=500, detail="File manager not found") + + faq_db = await file_manager.add_faq(faq, token) + + return {"faq": faq_db, "message": "FAQ created successfully"} + +@router.delete("") +async def delete_faq(faq: schemas.FAQDelete, token:str): + """ + Elimina una FAQ esistente. + + ### Args: + * **faq (schemas.FAQDelete)**: I dati della FAQ da eliminare. + + ### Raises: + * **HTTPException.400_BAD_REQUEST**: Se non sono stati forniti dati per l'eliminazione. + * **HTTPException.404_NOT_FOUND**: Se la FAQ non esiste. + * **HTTPException.500_INTERNAL_SERVER_ERROR**: Se si verifica un errore durante l'eliminazione della FAQ. + """ + if not faq: + raise HTTPException(status_code=400, detail="No data provided for deletion") + + file_manager = get_file_manager_by_extension() + if file_manager is None: + raise HTTPException(status_code=500, detail="File manager not found") + + await file_manager.delete_faq(faq, token) + return {"message": "FAQ deleted successfully"} + +@router.put("") +async def update_faq(faq: schemas.FAQ, token:str): + """ + Aggiorna una FAQ esistente. + + ### Args: + * **faq (schemas.FAQUpdate)**: I dati della FAQ da aggiornare. + * **faq_id (str)**: L'ID della FAQ da aggiornare. + + ### Raises: + * **HTTPException.400_BAD_REQUEST**: Se non sono stati forniti dati per l'aggiornamento. + * **HTTPException.404_NOT_FOUND**: Se la FAQ non esiste. + * **HTTPException.500_INTERNAL_SERVER_ERROR**: Se si verifica un errore durante l'aggiornamento della FAQ. + """ + if not faq: + raise HTTPException(status_code=400, detail="No data provided for update") + + file_manager = get_file_manager_by_extension() + if file_manager is None: + raise HTTPException(status_code=500, detail="File manager not found") + + faq_db = await file_manager.update_faq(faq, token) + return {"faq": faq_db, "message": "FAQ updated successfully"} \ No newline at end of file diff --git a/app/schemas.py b/app/schemas.py index dfaaaf1..ea35a84 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -1,27 +1,48 @@ -from pydantic import BaseModel +from pydantic import BaseModel, field_validator from typing import List from datetime import datetime from uuid import UUID + class Message(BaseModel): sender: str content: str + class Question(BaseModel): - question: str - messages: List[Message] = [] + question: str + messages: List[Message] = [] + class Context(BaseModel): - context: str + context: str + class Document(BaseModel): - id: UUID - title: str - updated_at: datetime - content: str + id: UUID + title: str + updated_at: datetime + content: str + class DocumentDelete(BaseModel): - id: str - title: str - token: str - current_password: str \ No newline at end of file + id: str + title: str + token: str + current_password: str + + +class FAQ(BaseModel): + id: str + title: str + question: str + answer: str + +class FAQCreate(BaseModel): + title: str + question: str + answer: str + +class FAQDelete(BaseModel): + id: str + admin_password: str \ No newline at end of file diff --git a/app/services/file_manager_service.py b/app/services/file_manager_service.py index 0c86ec8..4240b71 100644 --- a/app/services/file_manager_service.py +++ b/app/services/file_manager_service.py @@ -2,6 +2,7 @@ from fastapi import Depends, File, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader, TextLoader +from langchain_core.documents import Document from fastapi import HTTPException import os import logging @@ -11,6 +12,8 @@ from bson import ObjectId from app.services.vector_database_service import get_vector_database, VectorDatabase +import app.schemas as schemas +from app.utils import get_object_id logger = logging.getLogger(__name__) @@ -45,7 +48,19 @@ def get_documents_number(self): """ # self.vector_database.delete_all_documents() return self.vector_database.count() - + + def get_documents(self): + """ + Restituisce i documenti dal database vettoriale. + + Param: + - skip: int - Il numero di documenti da saltare. + - limit: int - Il numero massimo di documenti da restituire. + + Returns: + - list: Una lista di documenti. + """ + return self.vector_database.get_all_documents() async def _save_file(self, file: File): """ @@ -170,9 +185,9 @@ async def delete_document( match delete_req.status_code: case 204: print(f"Documento eliminato correttamente") - case 400: + case 404: raise HTTPException( - status_code=400, + status_code=404, detail=f"Documento non trovato", ) case 401: @@ -223,7 +238,132 @@ async def _load_split_file(self, file_path: str): class StringManager(FileManager): - pass + async def _load_split_file(self, faq: schemas.FAQ): + data = Document( + page_content=f"Domanda: {faq.question}\nRisposta: {faq.answer}", + metadata={"source": "faqs", "faq_id": faq.id}, + ) + print("[StringManager] data:", data) + chunks = self.splitter.split_documents([data]) + return chunks + + async def add_faq(self, faq: schemas.FAQCreate, token: str): + """ + Divide la faq in chunk, + la salva nel database vettoriale. + + Param: + - faq: schemas.FAQ - La faq da caricare. + """ + print("[StringManager] adding faq:", faq) + + ris = requests.post( + "http://database-api:8000/faqs", + headers={"Authorization": f"Bearer {token}"}, + json=faq.dict(), + ) + faq_json = ris.json() + + if ris.status_code != 201: + raise HTTPException(status_code=ris.status_code, detail=ris.json()) + + faq_db = schemas.FAQ( + id=faq_json["id"], + title=faq.title, + question=faq.question, + answer=faq.answer, + ) + chunks = await self._load_split_file(faq_db) + self.vector_database.add_documents(chunks) + + return faq_db + + async def delete_faq(self, faq: schemas.FAQDelete, token: str): + """ + Elimina la faq dal database vettoriale e dal database. + + Param: + - faq: schemas.FAQDelete - La faq da eliminare. + - token: str - Il token di autenticazione. + """ + # rimuovi da Database API + delete_req = requests.delete( + f"http://database-api:8000/faqs/{faq.id}", + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {token}", + }, + json={ + "current_password": faq.admin_password, + }, + ) + + match delete_req.status_code: + case 204: + print(f"FAQ eliminata correttamente") + case 404: + raise HTTPException( + status_code=400, + detail=f"FAQ non trovata", + ) + case 401: + raise HTTPException( + status_code=401, + detail=f"Password errata", + ) + case _: + raise HTTPException( + status_code=500, + detail=f"Errore nel caricare e processare file {delete_req.text}", + ) + + # rimuovi da database vettoriale + self.vector_database.delete_faq(faq.id) + + async def update_faq(self, faq: schemas.FAQ, token: str): + """ + Aggiorna la faq nel database. + + Param: + - faq: schemas.FAQUpdate - La faq da aggiornare. + - token: str - Il token di autenticazione. + """ + # rimuovi da Database API + update_req = requests.patch( + f"http://database-api:8000/faqs/{faq.id}", + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {token}", + }, + json={ + "title": faq.title, + "question": faq.question, + "answer": faq.answer, + }, + ) + + match update_req.status_code: + case 200: + print(f"FAQ aggiornata correttamente") + case 404: + raise HTTPException( + status_code=400, + detail=f"FAQ non trovata", + ) + case 401: + raise HTTPException( + status_code=401, + detail=f"Password errata", + ) + case _: + raise HTTPException( + status_code=500, + detail=f"Errore nel caricare e processare file {update_req.text}", + ) + + self.vector_database.delete_faq(faq.id) + chunks = await self._load_split_file(faq) + self.vector_database.add_documents(chunks) def get_file_manager(file: UploadFile = None): @@ -239,6 +379,7 @@ def get_file_manager(file: UploadFile = None): if file is None: return TextFileManager() match file.content_type: + # TODO: capire se catcha anche le stringhe(=faq) case "text/plain": return TextFileManager() case "application/pdf": @@ -247,7 +388,7 @@ def get_file_manager(file: UploadFile = None): raise ValueError("Unsupported file type") -def get_file_manager_by_extension(file_path: str): +def get_file_manager_by_extension(file_path: str = None): """ Restituisce il file manager in base all'estensione del file. @@ -257,6 +398,8 @@ def get_file_manager_by_extension(file_path: str): Returns: - FileManager: Il file manager appropriato. """ + if file_path is None: + return StringManager() _, ext = os.path.splitext(file_path) match ext: case ".txt": diff --git a/app/services/vector_database_service.py b/app/services/vector_database_service.py index ef8f546..484af3c 100644 --- a/app/services/vector_database_service.py +++ b/app/services/vector_database_service.py @@ -66,29 +66,6 @@ def _get_db(self): ) return self._db - # TODO: da spostare nel contextManager - # def _load_and_split_docs(self, folder_path: str) -> List[Document]: - - # def _is_document_duplicate(self, document: Document) -> bool: - # """Controlla se il documento è duplicato.""" - # doc_uuid = uuid.uuid3( - # uuid.NAMESPACE_DNS, document.page_content - # ) - # print("doc uuid:", doc_uuid) - # if doc_uuid in self._get_db().get()["ids"]: - # print("Documento duplicato trovato.") - # logger.warning("Documento duplicato trovato.") - # return True - # pass - - # def _filter_duplicates(self, documents: List[Document]) -> List[Document]: - # """Controlla i documenti duplicati e li rimuove.""" - # filtered_documents = [] - # for doc in documents: - # if not self._is_document_duplicate(doc): - # filtered_documents.append(doc) - # return filtered_documents - def _generate_document_ids(self, documents: List[Document]) -> List[str]: """Estrae gli ID dei documenti.""" return [str(uuid.uuid3(uuid.NAMESPACE_DNS, doc.page_content)) for doc in documents] @@ -129,6 +106,17 @@ def delete_document(self, document_path: str): logger.error(f"Errore durante l'eliminazione del documento: {e}", exc_info=True) raise + def delete_faq(self, faq_id: str): + """Elimina una FAQ dal database.""" + try: + db = self._get_db() + db.delete(where={"faq_id": faq_id}) + print(f"[VECTOR DB] FAQ con ID {faq_id} eliminata.") + logger.info(f"FAQ con ID {faq_id} eliminata.") + except Exception as e: + logger.error(f"Errore durante l'eliminazione della FAQ: {e}", exc_info=True) + raise + def search_context(self, query: str, results_number: int = 2) -> List[Document]: # TODO: Non è detto che serva: Verifica se ci sono documenti # ensure_vectorized() @@ -149,7 +137,17 @@ def delete_all_documents(self): except Exception as e: logger.error(f"Errore durante l'eliminazione di tutti i documenti: {e}", exc_info=True) raise - + + def get_all_documents(self): + """Recupera tutti i documenti dal database.""" + try: + db = self._get_db() + results = db.get() + return results + except Exception as e: + logger.error(f"Errore durante il recupero di tutti i documenti: {e}", exc_info=True) + return [] + # metodi ausiliari def _get_collection_count(self) -> int: """Helper per gestire accesso a dettagli Chroma.""" diff --git a/app/utils.py b/app/utils.py new file mode 100644 index 0000000..3ce4a6f --- /dev/null +++ b/app/utils.py @@ -0,0 +1,14 @@ +import uuid +import hashlib +from bson import ObjectId + +def get_uuid3(text): + """ + Generate a UUID3 hash from the given text. + """ + return str(uuid.uuid3(uuid.NAMESPACE_DNS, text)) + + +def get_object_id(text): + hash_bytes = hashlib.md5(text.encode("utf-8")).digest()[:12] + return ObjectId(hash_bytes) \ No newline at end of file From 1f1b6099d7a72754e5e91402af4cea17f6c16a95 Mon Sep 17 00:00:00 2001 From: bazz333 Date: Fri, 9 May 2025 16:52:38 +0200 Subject: [PATCH 4/7] fix: aggiorna le istruzioni del chatbot e migliora la gestione del contesto nelle risposte LLM --- app/config.py | 19 +++---------------- app/services/file_manager_service.py | 5 +++-- app/services/llm_response_service.py | 13 +++++++------ app/services/vector_database_service.py | 4 ++-- 4 files changed, 15 insertions(+), 26 deletions(-) diff --git a/app/config.py b/app/config.py index 6dac68c..4b8f825 100644 --- a/app/config.py +++ b/app/config.py @@ -16,24 +16,11 @@ class Settings(BaseSettings): Missione: Assistere gli utenti nell'esplorazione dei prodotti forniti dall'azienda, informarli sulle caratteristiche del prodotto e consigliane l'acquisto. - Tratti della personalità: - - Conoscenza: Fornisce risposte accurate dalla base di conoscenze. - - Amichevole: cordiale e disponibile. - - Trasparente: condivide solo informazioni convalidate. - - Capacità: - - Educare: Spiegare i prodotti presenti, consigliarne i possibili usi, la storia dell'azienda e i suoi valori utilizzando la base di conoscenze. - - Assistere: Consigliare prodotti e fornire informazioni rigorosamente basate sui dati approvati. - - Ispirare: evidenziare i vantaggi e gli usi di ogni prodotto. - - Coinvolgere: Rispondere alle domande in modo chiaro ed educato, reindirizzando gli utenti al supporto se le risposte non sono disponibili. - - Tono: - - Positivo, professionale e privo di gergo. - - Rispettoso ed empatico per garantire un'esperienza di supporto. - Regole comportamentali: - - Utilizzare solo la base di conoscenze fornita. + - È essenziale che tu usi il più possibile le informazioni fornite dai documenti passati come contesto. - Se una risposta non è disponibile, informare l'utente e suggerire di consultare l'assistenza clienti. + - Sii chiaro ed elenca metodicamente le infomazioni richieste. + - Non esprimere opinioni personali o fare supposizioni. - Non fornire informazioni personali. """ CHUNK_SIZE: int = 400 diff --git a/app/services/file_manager_service.py b/app/services/file_manager_service.py index 4240b71..57a2855 100644 --- a/app/services/file_manager_service.py +++ b/app/services/file_manager_service.py @@ -14,6 +14,7 @@ from app.services.vector_database_service import get_vector_database, VectorDatabase import app.schemas as schemas from app.utils import get_object_id +from app.config import settings logger = logging.getLogger(__name__) @@ -22,8 +23,8 @@ class FileManager(ABC): def __init__(self): self.vector_database = get_vector_database() self.splitter = RecursiveCharacterTextSplitter( - chunk_size=400, - chunk_overlap=100, + chunk_size=settings.CHUNK_SIZE, + chunk_overlap=settings.CHUNK_OVERLAP, ) def get_full_path(self, filename: str) -> str: diff --git a/app/services/llm_response_service.py b/app/services/llm_response_service.py index 1ed9479..111a174 100644 --- a/app/services/llm_response_service.py +++ b/app/services/llm_response_service.py @@ -19,9 +19,7 @@ class LLMResponseService: - def __init__( - self - ): + def __init__(self): self.LLM = get_llm_model() self.vector_database = get_vector_database() self.CHATBOT_INSTRUCTIONS = settings.CHATBOT_INSTRUCTIONS @@ -34,18 +32,21 @@ def _get_context(self, question: str) -> str: question_context = self.vector_database.search_context(question) if not question_context: return "" - return question_context + output = [] + for doc in question_context: + output.append(doc.page_content) + return output except Exception as e: print(f"Error getting context: {str(e)}", exc_info=True) raise HTTPException( status_code=500, detail=f"Error getting context: {str(e)}" ) - def generate_llm_response(self, question: schemas.Question) -> StreamingResponse: + def generate_llm_response(self, question: schemas.Question) -> StreamingResponse: context = self._get_context(question.question) # TODO: gestire array messaggi formatted_messages = "" - + if question.messages: if isinstance(question.messages, list): formatted_messages = "\n".join( diff --git a/app/services/vector_database_service.py b/app/services/vector_database_service.py index 484af3c..e6ab236 100644 --- a/app/services/vector_database_service.py +++ b/app/services/vector_database_service.py @@ -23,7 +23,7 @@ def add_documents(self, documents: List[Document]): pass @abstractmethod - def search_context(self, query: str, results_number: int = 2) -> List[Document]: + def search_context(self, query: str, results_number: int = 4) -> List[Document]: pass # metodi ausiliari @@ -117,7 +117,7 @@ def delete_faq(self, faq_id: str): logger.error(f"Errore durante l'eliminazione della FAQ: {e}", exc_info=True) raise - def search_context(self, query: str, results_number: int = 2) -> List[Document]: + def search_context(self, query: str, results_number: int = 4) -> List[Document]: # TODO: Non è detto che serva: Verifica se ci sono documenti # ensure_vectorized() try: From 29ceb88ecd6ea1c7a995da07d11d0e037305229a Mon Sep 17 00:00:00 2001 From: lucaribon Date: Fri, 9 May 2025 20:23:58 +0200 Subject: [PATCH 5/7] refactor: necessario per definizione architettura delle classi; Co-authored-by: Matteo Bazzan --- app/config.py | 6 +- app/routes/documents.py | 17 ++-- app/routes/faq.py | 21 +++-- app/routes/llm.py | 49 +++++------ app/schemas.py | 26 +++--- app/services/embeddings_service.py | 71 ++++++++------- app/services/file_manager_service.py | 36 ++++---- app/services/llm_response_service.py | 16 ++-- app/services/llm_service.py | 33 ++++--- app/services/vector_database_service.py | 87 +++++++++--------- tests/services/test_embeddings_service.py | 52 ++++++++--- tests/services/test_file_manager_service.py | 93 ++++++++++++++------ tests/services/test_llm_response_services.py | 52 ++++++++--- tests/services/test_llm_service.py | 11 ++- 14 files changed, 332 insertions(+), 238 deletions(-) diff --git a/app/config.py b/app/config.py index 4b8f825..6e0a975 100644 --- a/app/config.py +++ b/app/config.py @@ -3,10 +3,10 @@ class Settings(BaseSettings): - OPENAI_API_KEY: str = os.environ.get("OPENAI_API_KEY", "") + OPENAI_API_KEY: str = os.environ.get("OPENAI_API_KEY") DOCUMENTS_FOLDER: str = "documenti" VECTOR_DB_PROVIDER: str = "chroma" - VECTOR_DB_PERSIST_DIRECTORY: str = "chroma_db" + VECTOR_DB_DIRECTORY: str = "chroma_db" EMBEDDING_MODEL_NAME: str = "text-embedding-ada-002" LLM_MODEL_NAME: str = "gpt-4o-mini" LLM_PROVIDER: str = "openai" @@ -19,7 +19,7 @@ class Settings(BaseSettings): Regole comportamentali: - È essenziale che tu usi il più possibile le informazioni fornite dai documenti passati come contesto. - Se una risposta non è disponibile, informare l'utente e suggerire di consultare l'assistenza clienti. - - Sii chiaro ed elenca metodicamente le infomazioni richieste. + - Sii chiaro ed elenca metodicamente le informazioni richieste. - Non esprimere opinioni personali o fare supposizioni. - Non fornire informazioni personali. """ diff --git a/app/routes/documents.py b/app/routes/documents.py index 80c4b27..bfb9a4d 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -1,5 +1,4 @@ -from fastapi import APIRouter, Depends, HTTPException, File, UploadFile -from app.services.llm_service import LLM, OpenAI +from fastapi import APIRouter, HTTPException, UploadFile from typing import List import os @@ -20,7 +19,7 @@ @router.post("") async def upload_file(files: List[UploadFile], token: str): """ - Carica il file nel database vettoriale + Carica il file nel database vettoriale. ### Args: * **files (List[UploadFile])**: I file da caricare. Devono essere file di testo o PDF. @@ -102,6 +101,7 @@ async def delete_file(fileDelete: schemas.DocumentDelete): ### Args: * **fileDelete (schemas.DocumentDelete)**: Il file da eliminare. Deve contenere il titolo, il token e la password corrente. + ### Raises: * **HTTPException.400_BAD_REQUEST**: Se il file non esiste o se si verifica un errore durante l'eliminazione. * **HTTPException.500_INTERNAL_SERVER_ERROR**: Se si verifica un errore durante l'eliminazione del file. @@ -123,13 +123,12 @@ async def delete_file(fileDelete: schemas.DocumentDelete): @router.get("") def get_documents(): """ - Ottiene la lista dei documenti dal database. - - Args: - - token (str): Il token di autenticazione dell'utente. + Restituisce il numero di documenti e i loro nomi. - Raises: - - HTTPException: Se si verifica un errore durante il recupero dei documenti. + ### Returns: + * **int**: Il numero di documenti. + * **List[str]**: I nomi dei documenti. + * **List[str]**: I nomi dei file nella directory /data/documents. """ file_manager = get_file_manager() return file_manager.get_documents_number(), file_manager.get_documents(),os.listdir("/data/documents") diff --git a/app/routes/faq.py b/app/routes/faq.py index b71fc8f..6206c1a 100644 --- a/app/routes/faq.py +++ b/app/routes/faq.py @@ -1,10 +1,6 @@ -from fastapi import APIRouter, Depends, HTTPException, File, UploadFile -from app.services.llm_service import LLM, OpenAI -from typing import List -import requests +from fastapi import APIRouter, HTTPException from app.services.file_manager_service import ( - get_file_manager, get_file_manager_by_extension, ) @@ -16,8 +12,9 @@ prefix="/faqs", ) + @router.post("") -async def create_faq(faq: schemas.FAQCreate, token:str): +async def create_faq(faq: schemas.FAQBase, token: str): """ Crea una nuova FAQ. @@ -40,8 +37,9 @@ async def create_faq(faq: schemas.FAQCreate, token:str): return {"faq": faq_db, "message": "FAQ created successfully"} + @router.delete("") -async def delete_faq(faq: schemas.FAQDelete, token:str): +async def delete_faq(faq: schemas.FAQDelete, token: str): """ Elimina una FAQ esistente. @@ -63,8 +61,9 @@ async def delete_faq(faq: schemas.FAQDelete, token:str): await file_manager.delete_faq(faq, token) return {"message": "FAQ deleted successfully"} + @router.put("") -async def update_faq(faq: schemas.FAQ, token:str): +async def update_faq(faq: schemas.FAQ, token: str): """ Aggiorna una FAQ esistente. @@ -79,10 +78,10 @@ async def update_faq(faq: schemas.FAQ, token:str): """ if not faq: raise HTTPException(status_code=400, detail="No data provided for update") - + file_manager = get_file_manager_by_extension() if file_manager is None: raise HTTPException(status_code=500, detail="File manager not found") - + faq_db = await file_manager.update_faq(faq, token) - return {"faq": faq_db, "message": "FAQ updated successfully"} \ No newline at end of file + return {"faq": faq_db, "message": "FAQ updated successfully"} diff --git a/app/routes/llm.py b/app/routes/llm.py index febb8d3..729e58d 100644 --- a/app/routes/llm.py +++ b/app/routes/llm.py @@ -1,8 +1,5 @@ from fastapi import APIRouter, Depends, HTTPException -from app.services.llm_service import LLM, OpenAI -# from app.services.chroma_services import embedding -from app.services.vector_database_service import get_vector_database from app.services.llm_response_service import LLMResponseService, get_llm_response_service import app.schemas as schemas @@ -19,19 +16,20 @@ async def generate_chat_response( """ Fornisce una risposta a una domanda utilizzando il contesto rilevante. - *Args*: - question (schemas.Question): La domanda e lo storico dei messaggi. - chat_service: Servizio di chat per generare risposte. + ### Args: + * **question (schemas.Question)**: La domanda e lo storico dei messaggi. - *Returns*: - La risposta generata dal modello LLM. + ### Returns: + * **response**: La risposta generata dal modello LLM. - *Raises*: - HTTPException: Se non viene fornita una domanda valida. + ### Raises: + * **HTTPException.404_NOT_FOUND**: Se non viene trovato alcun contesto rilevante. + * **HTTPException.400_BAD_REQUEST**: Se non viene fornita alcuna domanda. + * **HTTPException.500_INTERNAL_SERVER_ERROR**: Se si verifica un errore interno del server. """ if not question.question or question.question.strip() == "": raise HTTPException(status_code=400, detail="Nessuna domanda fornita") - + return llm_response_service.generate_llm_response(question) @@ -39,15 +37,18 @@ async def generate_chat_response( async def generate_chat_name( context: schemas.Context ): - """ " - Genera un nome per una chat. + """ + Genera un nome per la chat in base al contesto fornito. + + ### Args: + * **context (schemas.Context)**: Il contesto della chat. + + ### Returns: + * **response**: Il nome generato per la chat. - *Args*: - context (schemas.Context): Il contesto della chat. - *Returns*: - str: Il nome generato per la chat. - *Raises*: - HTTPException: Se non viene fornito un contesto valido. + ### Raises: + * **HTTPException.400_BAD_REQUEST**: Se non viene fornito alcun contesto. + * **HTTPException.500_INTERNAL_SERVER_ERROR**: Se si verifica un errore interno del server. """ if not context.context: raise HTTPException(status_code=400, detail="Nessun contesto fornito") @@ -55,12 +56,4 @@ async def generate_chat_name( llm_response_service = get_llm_response_service() return llm_response_service.generate_llm_chat_name( context.context - ) - - -@router.get("/ping") -async def ping(): - import requests - - ris = requests.get("https://www.google.com") - return {"status": "ok", "message": ris.text} + ) \ No newline at end of file diff --git a/app/schemas.py b/app/schemas.py index ea35a84..915112c 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -18,31 +18,31 @@ class Context(BaseModel): context: str -class Document(BaseModel): - id: UUID +class DocumentBase(BaseModel): + id: str title: str + + +class Document(DocumentBase): updated_at: datetime content: str -class DocumentDelete(BaseModel): - id: str - title: str +class DocumentDelete(DocumentBase): token: str current_password: str -class FAQ(BaseModel): - id: str +class FAQBase(BaseModel): title: str question: str answer: str -class FAQCreate(BaseModel): - title: str - question: str - answer: str - + +class FAQ(FAQBase): + id: str + + class FAQDelete(BaseModel): id: str - admin_password: str \ No newline at end of file + admin_password: str diff --git a/app/services/embeddings_service.py b/app/services/embeddings_service.py index 5e3d95a..e8d9717 100644 --- a/app/services/embeddings_service.py +++ b/app/services/embeddings_service.py @@ -3,39 +3,46 @@ import os from app.config import settings + class EmbeddingProvider(ABC): - """Interfaccia per i provider di embedding.""" + """Interfaccia per i provider di embedding.""" + + @abstractmethod + def get_embedding_function(self): + """Restituisce la funzione di embedding.""" + pass - @abstractmethod - def get_embedding_function(self): - """Restituisce la funzione di embedding.""" - pass class OpenAIEmbeddingProvider(EmbeddingProvider): - """Provider di embedding di OpenAI.""" - def __init__(self, api_key: str = settings.OPENAI_API_KEY, model_name: str = settings.EMBEDDING_MODEL_NAME): - self.api_key = api_key - self.model_name = model_name - self._embedding_function = None - - def get_embedding_function(self) -> OpenAIEmbeddings: - """Restituisce la funzione di embedding.""" - if os.environ.get("OPENAI_API_KEY") is None and self.api_key is None: - raise ValueError("API key non trovata. Assicurati di averla impostata.") - - if self._embedding_function is None: - self._embedding_function = OpenAIEmbeddings( - openai_api_key=self.api_key, - model=self.model_name - ) - return self._embedding_function - -def get_embedding_provider() -> EmbeddingProvider: - """Restituisce il provider di embedding in base alla configurazione.""" - provider = settings.LLM_PROVIDER.lower() - match provider: - case "openai": - return OpenAIEmbeddingProvider() - # aggiungere altri provider qui - case _: - raise ValueError(f"Provider di embedding '{provider}' non supportato.") \ No newline at end of file + """Provider di embedding di OpenAI.""" + + def __init__( + self, + api_key: str = settings.OPENAI_API_KEY, + model_name: str = settings.EMBEDDING_MODEL_NAME, + ): + self._api_key = api_key + self._model_name = model_name + self._embedding_function = None + + def get_embedding_function(self) -> OpenAIEmbeddings: + """Restituisce la funzione di embedding.""" + if os.environ.get("OPENAI_API_KEY") is None or self._api_key is None: + raise ValueError("API key non trovata. Assicurati di averla impostata.") + + if self._embedding_function is None: + self._embedding_function = OpenAIEmbeddings( + openai_api_key=self._api_key, model=self._model_name + ) + return self._embedding_function + + +def get_embedding_provider() -> EmbeddingProvider: + """Restituisce il provider di embedding in base alla configurazione.""" + provider = settings.LLM_PROVIDER.lower() + match provider: + case "openai": + return OpenAIEmbeddingProvider() + # aggiungere altri provider qui + case _: + raise ValueError(f"Provider di embedding '{provider}' non supportato.") diff --git a/app/services/file_manager_service.py b/app/services/file_manager_service.py index 57a2855..a3ae84e 100644 --- a/app/services/file_manager_service.py +++ b/app/services/file_manager_service.py @@ -9,11 +9,9 @@ import json import requests from datetime import datetime -from bson import ObjectId -from app.services.vector_database_service import get_vector_database, VectorDatabase +from app.services.vector_database_service import get_vector_database import app.schemas as schemas -from app.utils import get_object_id from app.config import settings logger = logging.getLogger(__name__) @@ -21,8 +19,8 @@ class FileManager(ABC): def __init__(self): - self.vector_database = get_vector_database() - self.splitter = RecursiveCharacterTextSplitter( + self._vector_database = get_vector_database() + self._splitter = RecursiveCharacterTextSplitter( chunk_size=settings.CHUNK_SIZE, chunk_overlap=settings.CHUNK_OVERLAP, ) @@ -48,7 +46,7 @@ def get_documents_number(self): - dict: Un dizionario contenente le statistiche sui documenti. """ # self.vector_database.delete_all_documents() - return self.vector_database.count() + return self._vector_database.count() def get_documents(self): """ @@ -61,7 +59,7 @@ def get_documents(self): Returns: - list: Una lista di documenti. """ - return self.vector_database.get_all_documents() + return self._vector_database.get_all_documents() async def _save_file(self, file: File): """ @@ -120,7 +118,7 @@ async def add_document(self, file: File, token: str): file_path = await self._save_file(file) chunks = await self._load_split_file(file_path) - self.vector_database.add_documents(chunks) + self._vector_database.add_documents(chunks) request_body = { "file_path": file_path, @@ -219,14 +217,14 @@ async def delete_document( ) # rimuovi da database vettoriale - self.vector_database.delete_document(file_path) + self._vector_database.delete_document(file_path) class TextFileManager(FileManager): async def _load_split_file(self, file_path: str): loader = TextLoader(file_path, encoding="utf-8") data = loader.load() - chunks = self.splitter.split_documents(data) + chunks = self._splitter.split_documents(data) return chunks @@ -234,7 +232,7 @@ class PdfFileManager(FileManager): async def _load_split_file(self, file_path: str): loader = PyPDFLoader(file_path, mode="single") data = loader.load() - chunks = self.splitter.split_documents(data) + chunks = self._splitter.split_documents(data) return chunks @@ -245,10 +243,10 @@ async def _load_split_file(self, faq: schemas.FAQ): metadata={"source": "faqs", "faq_id": faq.id}, ) print("[StringManager] data:", data) - chunks = self.splitter.split_documents([data]) + chunks = self._splitter.split_documents([data]) return chunks - async def add_faq(self, faq: schemas.FAQCreate, token: str): + async def add_faq(self, faq: schemas.FAQBase, token: str): """ Divide la faq in chunk, la salva nel database vettoriale. @@ -275,7 +273,7 @@ async def add_faq(self, faq: schemas.FAQCreate, token: str): answer=faq.answer, ) chunks = await self._load_split_file(faq_db) - self.vector_database.add_documents(chunks) + self._vector_database.add_documents(chunks) return faq_db @@ -295,7 +293,7 @@ async def delete_faq(self, faq: schemas.FAQDelete, token: str): "Authorization": f"Bearer {token}", }, json={ - "current_password": faq.admin_password, + "current_password": faq.admin_password, }, ) @@ -319,7 +317,7 @@ async def delete_faq(self, faq: schemas.FAQDelete, token: str): ) # rimuovi da database vettoriale - self.vector_database.delete_faq(faq.id) + self._vector_database.delete_faq(faq.id) async def update_faq(self, faq: schemas.FAQ, token: str): """ @@ -361,10 +359,10 @@ async def update_faq(self, faq: schemas.FAQ, token: str): status_code=500, detail=f"Errore nel caricare e processare file {update_req.text}", ) - - self.vector_database.delete_faq(faq.id) + + self._vector_database.delete_faq(faq.id) chunks = await self._load_split_file(faq) - self.vector_database.add_documents(chunks) + self._vector_database.add_documents(chunks) def get_file_manager(file: UploadFile = None): diff --git a/app/services/llm_response_service.py b/app/services/llm_response_service.py index 111a174..7324223 100644 --- a/app/services/llm_response_service.py +++ b/app/services/llm_response_service.py @@ -20,16 +20,16 @@ class LLMResponseService: def __init__(self): - self.LLM = get_llm_model() - self.vector_database = get_vector_database() - self.CHATBOT_INSTRUCTIONS = settings.CHATBOT_INSTRUCTIONS + self._LLM = get_llm_model() + self._vector_database = get_vector_database() + self._CHATBOT_INSTRUCTIONS = settings.CHATBOT_INSTRUCTIONS def _get_context(self, question: str) -> str: """ Get the context for the question from the vector database. """ try: - question_context = self.vector_database.search_context(question) + question_context = self._vector_database.search_context(question) if not question_context: return "" output = [] @@ -57,7 +57,7 @@ def generate_llm_response(self, question: schemas.Question) -> StreamingResponse context_messages = self._get_context(formatted_messages) messages = [ - SystemMessage(self.CHATBOT_INSTRUCTIONS), + SystemMessage(self._CHATBOT_INSTRUCTIONS), SystemMessage( f"Contesto: {context}\n{context_messages}", ), @@ -68,7 +68,7 @@ def generate_llm_response(self, question: schemas.Question) -> StreamingResponse print(f"PROMPT: {context} {context_messages}") print() try: - stream_response = self.LLM.model.astream(messages) + stream_response = self._LLM._model.astream(messages) async def stream_adapter(): try: @@ -98,8 +98,6 @@ async def stream_adapter(): return StreamingResponse(stream_adapter(), media_type="text/event-stream") def generate_llm_chat_name(self, chat_history: str) -> str: - messages_context = self._get_context(chat_history) - messages = [ SystemMessage( "Genera un nome per la chat in base alle domande e risposte fornite, deve essere composto da massimo 40 caratteri, non deve contenere informazioni personali e deve essere professionale. Rispondi solo con il nome della chat. Evita di includere 'chatbot' o 'assistente'. Deve racchiudere gli argomenti trattati." @@ -108,7 +106,7 @@ def generate_llm_chat_name(self, chat_history: str) -> str: ] try: - return self.LLM.model.invoke(messages).content + return self._LLM._model.invoke(messages).content except Exception as e: logger.error(f"Error generating chat name: {str(e)}", exc_info=True) raise HTTPException( diff --git a/app/services/llm_service.py b/app/services/llm_service.py index ccc4831..42a6ea7 100644 --- a/app/services/llm_service.py +++ b/app/services/llm_service.py @@ -6,11 +6,12 @@ logger = logging.getLogger(__name__) + # abstract class class LLM(ABC): def __init__(self, model_name: str): - self.model_name = model_name - self.model = None + self._model_name = model_name + self._model = None self._check_environment() self._initialize_model() @@ -30,7 +31,6 @@ def _initialize_model(self): class OpenAI(LLM): - # private method def _check_environment(self): if not os.environ.get("OPENAI_API_KEY"): raise ValueError("API key mancante per OpenAI") @@ -38,12 +38,14 @@ def _check_environment(self): def _initialize_model(self): try: - self.model = init_chat_model(model=self.model_name, model_provider="openai") - if self.model is None: - raise ValueError(f"Failed to initialize model {self.model_name}") + self._model = init_chat_model( + model=self._model_name, model_provider="openai", + ) + if self._model is None: + raise ValueError(f"Failed to initialize model {self._model_name}") except Exception as e: - logger.error(f"Error initializing model {self.model_name}: {str(e)}") - raise ValueError(f"Invalid or unavailable model: {self.model_name}") from e + logger.error(f"Error initializing model {self._model_name}: {str(e)}") + raise ValueError(f"Invalid or unavailable model: {self._model_name}") from e class Ollama(LLM): @@ -52,12 +54,15 @@ def _check_environment(self): def _initialize_model(self): try: - self.model = init_chat_model(model=self.model_name, model_provider="ollama") - if self.model is None: - raise ValueError(f"Failed to initialize model {self.model_name}") + self._model = init_chat_model( + model=self._model_name, model_provider="ollama" + ) + if self._model is None: + raise ValueError(f"Failed to initialize model {self._model_name}") except Exception as e: - logger.error(f"Error initializing model {self.model_name}: {str(e)}") - raise ValueError(f"Invalid or unavailable model: {self.model_name}") from e + logger.error(f"Error initializing model {self._model_name}: {str(e)}") + raise ValueError(f"Invalid or unavailable model: {self._model_name}") from e + def get_llm_model() -> LLM: """Factory function per creare un'istanza di LLM""" @@ -69,4 +74,4 @@ def get_llm_model() -> LLM: return Ollama(settings.LLM_MODEL_NAME) # aggiungere altri provider qui case _: - raise ValueError(f"Provider LLM '{provider}' non supportato.") \ No newline at end of file + raise ValueError(f"Provider LLM '{provider}' non supportato.") diff --git a/app/services/vector_database_service.py b/app/services/vector_database_service.py index e6ab236..93ef715 100644 --- a/app/services/vector_database_service.py +++ b/app/services/vector_database_service.py @@ -18,14 +18,36 @@ class VectorDatabase(ABC): """Interfaccia per la gestione del database vettoriale.""" + @abstractmethod + def __init__(self): + self._embedding_provider = get_embedding_provider() + self._persist_directory = settings.VECTOR_DB_DIRECTORY + self._db = None + + @abstractmethod + def _get_db(self): + pass + @abstractmethod def add_documents(self, documents: List[Document]): pass + @abstractmethod + def delete_document(self, document_path: str): + pass + @abstractmethod def search_context(self, query: str, results_number: int = 4) -> List[Document]: pass + @abstractmethod + def delete_all_documents(self): + pass + + @abstractmethod + def get_all_documents(self): + pass + # metodi ausiliari @abstractmethod def is_empty(self) -> bool: @@ -35,20 +57,13 @@ def is_empty(self) -> bool: def count(self) -> int: pass - # @abstractmethod - # def ensure_vectorized(self, documents_folder: str): - # """Metodo per caricare e vettorializzare se vuoto.""" - # # TODO: capire se lasciare in produzione - # pass - -# TODO: cistemare con i document loaders specifici class ChromaDB(VectorDatabase): """Implementazione del database vettoriale ChromaDB.""" def __init__( self, - persist_directory: str = settings.VECTOR_DB_PERSIST_DIRECTORY, + persist_directory: str = settings.VECTOR_DB_DIRECTORY, ): self.embedding_provider = get_embedding_provider() self.persist_directory = persist_directory @@ -58,7 +73,9 @@ def __init__( def _get_db(self): if self._db is None: - logger.info(f"ChromaDB: Inizializzazione del database in {self.persist_directory}") + logger.info( + f"ChromaDB: Inizializzazione del database in {self.persist_directory}" + ) self._db = Chroma( collection_name="supplai_documents", persist_directory=self.persist_directory, @@ -68,16 +85,18 @@ def _get_db(self): def _generate_document_ids(self, documents: List[Document]) -> List[str]: """Estrae gli ID dei documenti.""" - return [str(uuid.uuid3(uuid.NAMESPACE_DNS, doc.page_content)) for doc in documents] + return [ + str(uuid.uuid3(uuid.NAMESPACE_DNS, doc.page_content)) for doc in documents + ] def add_documents(self, documents_chunk: List[Document]): - print("document_chunks",documents_chunk) + print("document_chunks", documents_chunk) if not documents_chunk: logger.warning("Nessun documento fornito per l'aggiunta.") return try: db = self._get_db() - + db.add_documents( documents=documents_chunk, ids=self._generate_document_ids(documents_chunk), @@ -86,7 +105,7 @@ def add_documents(self, documents_chunk: List[Document]): print( f"ChromaDB: Aggiunti {len(documents_chunk)} documenti al vector store." ) - print("ChromaDB: numero di documenti presenti",self.count()) + print("ChromaDB: numero di documenti presenti", self.count()) logger.info(f"Aggiunti {len(documents_chunk)} documenti al vector store.") except Exception as e: @@ -103,7 +122,9 @@ def delete_document(self, document_path: str): print(f"[VECTOR DB] Documento con PATH {document_path} eliminato.") logger.info(f"Documento con PATH {document_path} eliminato.") except Exception as e: - logger.error(f"Errore durante l'eliminazione del documento: {e}", exc_info=True) + logger.error( + f"Errore durante l'eliminazione del documento: {e}", exc_info=True + ) raise def delete_faq(self, faq_id: str): @@ -118,8 +139,6 @@ def delete_faq(self, faq_id: str): raise def search_context(self, query: str, results_number: int = 4) -> List[Document]: - # TODO: Non è detto che serva: Verifica se ci sono documenti - # ensure_vectorized() try: db = self._get_db() results = db.similarity_search(query, k=results_number) @@ -135,9 +154,12 @@ def delete_all_documents(self): db.reset_collection() print("[VECTOR DB] Tutti i documenti eliminati.") except Exception as e: - logger.error(f"Errore durante l'eliminazione di tutti i documenti: {e}", exc_info=True) + logger.error( + f"Errore durante l'eliminazione di tutti i documenti: {e}", + exc_info=True, + ) raise - + def get_all_documents(self): """Recupera tutti i documenti dal database.""" try: @@ -145,7 +167,9 @@ def get_all_documents(self): results = db.get() return results except Exception as e: - logger.error(f"Errore durante il recupero di tutti i documenti: {e}", exc_info=True) + logger.error( + f"Errore durante il recupero di tutti i documenti: {e}", exc_info=True + ) return [] # metodi ausiliari @@ -160,9 +184,7 @@ def _get_collection_count(self) -> int: ) return 0 except Exception as e: - print( - f"Errore durante il recupero del count della collection: {e}" - ) + print(f"Errore durante il recupero del count della collection: {e}") return 0 def is_empty(self) -> bool: @@ -174,30 +196,11 @@ def count(self) -> int: def _delete(self): return self._get_db().delete_collection() - # def ensure_vectorized(self, documents_folder: str): - # """Controlla se il DB è vuoto e, in caso, carica e vettorializza.""" - # if self.is_empty(): - # logger.info( - # f"Vector store in {self.persist_directory} è vuoto. Avvio vettorizzazione da {documents_folder}..." - # ) - - # # TODO: delegare al context manager - # texts_to_add = self._load_and_split_docs(documents_folder) - # if texts_to_add: - # self.add_documents(texts_to_add) - # logger.info( - # f"Vettorizzazione completata. {self.count()} documenti nel DB." - # ) - # else: - # logger.warning("Nessun documento da vettorializzare trovato.") - # else: - # logger.info(f"Vector store già inizializzato con {self.count()} documenti.") - def get_vector_database() -> VectorDatabase: match settings.VECTOR_DB_PROVIDER.lower(): case "chroma": - vdb = ChromaDB(persist_directory=settings.VECTOR_DB_PERSIST_DIRECTORY) + vdb = ChromaDB(persist_directory=settings.VECTOR_DB_DIRECTORY) # vdb.ensure_vectorized(settings.DOCUMENTS_FOLDER) return vdb case _: diff --git a/tests/services/test_embeddings_service.py b/tests/services/test_embeddings_service.py index 5afac90..7d5bb1b 100644 --- a/tests/services/test_embeddings_service.py +++ b/tests/services/test_embeddings_service.py @@ -1,20 +1,36 @@ import pytest -from app.services.embeddings_service import OpenAIEmbeddingProvider, get_embedding_provider +from app.services.embeddings_service import ( + OpenAIEmbeddingProvider, + get_embedding_provider, +) from langchain_openai import OpenAIEmbeddings + def test_openai_embedding_provider(monkeypatch): - embedding_provider = OpenAIEmbeddingProvider("test_api_key", "text-embedding-ada-002") - assert isinstance(embedding_provider, OpenAIEmbeddingProvider), "Should return an instance of OpenAIEmbeddingProvider" - assert embedding_provider.api_key == "test_api_key", "Should use the mocked API key" - assert embedding_provider.model_name == "text-embedding-ada-002", "Should use the mocked model name" + embedding_provider = OpenAIEmbeddingProvider( + "test_api_key", "text-embedding-ada-002" + ) + assert isinstance( + embedding_provider, OpenAIEmbeddingProvider + ), "Should return an instance of OpenAIEmbeddingProvider" + assert ( + embedding_provider._api_key == "test_api_key" + ), "Should use the mocked API key" + assert ( + embedding_provider._model_name == "text-embedding-ada-002" + ), "Should use the mocked model name" + def test_openai_embedding_function(monkeypatch): embedding_provider = OpenAIEmbeddingProvider() embedding_function = embedding_provider.get_embedding_function() - + assert embedding_function is not None, "Should return a valid embedding function" - assert isinstance(embedding_function, OpenAIEmbeddings), "Should return an instance of OpenAIEmbeddings" + assert isinstance( + embedding_function, OpenAIEmbeddings + ), "Should return an instance of OpenAIEmbeddings" + def test_openai_embedding_function_no_api_key(monkeypatch): embedding_provider = OpenAIEmbeddingProvider(None, "text-embedding-ada-002") @@ -22,16 +38,24 @@ def test_openai_embedding_function_no_api_key(monkeypatch): with pytest.raises(ValueError): embedding_provider.get_embedding_function() + def test_get_embeddings_provider(monkeypatch): # Mock the environment variable for embedding provider - monkeypatch.setattr("app.services.embeddings_service.settings.LLM_PROVIDER", "openai") - + monkeypatch.setattr( + "app.services.embeddings_service.settings.LLM_PROVIDER", "openai" + ) + embedding_provider = get_embedding_provider() - assert isinstance(embedding_provider, OpenAIEmbeddingProvider), "Should return an instance of EmbeddingsService" - + assert isinstance( + embedding_provider, OpenAIEmbeddingProvider + ), "Should return an instance of EmbeddingsService" + + def test_get_embedding_provider_invalid(monkeypatch): # Mock the environment variable for an invalid provider - monkeypatch.setattr("app.services.embeddings_service.settings.LLM_PROVIDER", "invalid_provider") - + monkeypatch.setattr( + "app.services.embeddings_service.settings.LLM_PROVIDER", "invalid_provider" + ) + with pytest.raises(ValueError): - get_embedding_provider() \ No newline at end of file + get_embedding_provider() diff --git a/tests/services/test_file_manager_service.py b/tests/services/test_file_manager_service.py index 2d3f313..445eefe 100644 --- a/tests/services/test_file_manager_service.py +++ b/tests/services/test_file_manager_service.py @@ -1,27 +1,33 @@ import pytest -from app.services.file_manager_service import get_file_manager, get_file_manager_by_extension, TextFileManager, PdfFileManager +from app.services.file_manager_service import ( + get_file_manager, + get_file_manager_by_extension, + TextFileManager, + PdfFileManager, +) from fastapi import Depends, File, UploadFile from io import BytesIO from unittest.mock import MagicMock, patch import os import asyncio + def test_txt_file_manager_get_full_path(monkeypatch): MyTxtFileManager = TextFileManager() file_name = "test.txt" file_path = MyTxtFileManager._get_full_path(file_name) - expected_path = os.path.join("/data/documents", "test.txt") - assert file_path == expected_path, "Should return the correct full path for the .txt file" - + assert ( + file_path == expected_path + ), "Should return the correct full path for the .txt file" def test_txt_file_manager_save_file(monkeypatch): MyTxtFileManager = TextFileManager() file_name = "test.txt" file_content = b"Test content" - + # Mock file file = MagicMock(spec=UploadFile) file.filename = file_name @@ -30,7 +36,7 @@ def test_txt_file_manager_save_file(monkeypatch): async def mock_read(): return file_content - # Simulate seek method + # Simulate seek method def mock_seek(position): pass # Do nothing, just simulate the method @@ -39,16 +45,21 @@ def mock_seek(position): file.seek.return_value = mock_seek # Mock _get_full_path method - monkeypatch.setattr(MyTxtFileManager, "_get_full_path", lambda x: os.path.join(".cache", x)) + monkeypatch.setattr( + MyTxtFileManager, "_get_full_path", lambda x: os.path.join(".cache", x) + ) - # pass with open(file_path, "wb") as f: + # pass with open(file_path, "wb") as f: with patch("builtins.open", MagicMock()): # Use asyncio.run to execute the async method file_path = asyncio.run(MyTxtFileManager._save_file(file)) # Check if the path is correct expected_path = os.path.join(".cache", "test.txt") - assert file_path == expected_path, "Should return the correct full path for the saved .txt file" + assert ( + file_path == expected_path + ), "Should return the correct full path for the saved .txt file" + @pytest.mark.asyncio async def test_text_file_manager_load_split_file(): @@ -58,7 +69,7 @@ async def test_text_file_manager_load_split_file(): # Mock the file content mock_file_content = "This is a test content for the text file." - + # create the file with open(file_path, "w") as f: f.write(mock_file_content) @@ -68,6 +79,7 @@ async def test_text_file_manager_load_split_file(): assert isinstance(result, list), "Should return a list of documents" assert len(result) > 0, "Should return a non-empty list of documents" + @pytest.mark.asyncio async def test_text_file_manager_add_document(monkeypatch): # Create an instance of TextFileManager @@ -76,7 +88,7 @@ async def test_text_file_manager_add_document(monkeypatch): # Create mock implementations for _save_file and _load_split_file async def mock_save_file(file): return "/mock/path/to/test.txt" # Return a mock file path - + async def mock_load_split_file(file_path): return ["chunk1", "chunk2", "chunk3"] # Mock the chunks from file splitting @@ -89,8 +101,10 @@ async def mock_load_split_file(file_path): # Mock HTTP request using patch to completely prevent the actual request with patch("requests.post") as mock_post: - mock_post.return_value = MagicMock(status_code=201) # Mock response with status 201 - + mock_post.return_value = MagicMock( + status_code=201 + ) # Mock response with status 201 + # Create a mock UploadFile instance file = MagicMock(spec=UploadFile) file.filename = "test.txt" @@ -100,10 +114,15 @@ async def mock_load_split_file(file_path): result = await MyTxtFileManager.add_document(file, "test_token") # Check if the result is True, indicating success - assert result is True, "Should return True if the document is added successfully" + assert ( + result is True + ), "Should return True if the document is added successfully" # You can also check if the vector_database.add_documents was called correctly - MyTxtFileManager.vector_database.add_documents.assert_called_once_with(["chunk1", "chunk2", "chunk3"]) + MyTxtFileManager._vector_database.add_documents.assert_called_once_with( + ["chunk1", "chunk2", "chunk3"] + ) + @pytest.mark.asyncio async def test_text_file_manager_delete_document(monkeypatch): @@ -111,19 +130,27 @@ async def test_text_file_manager_delete_document(monkeypatch): MyTxtFileManager = TextFileManager() # Mock the file deletion logic - monkeypatch.setattr(os, "remove", MagicMock()) # Mock os.remove to avoid actual file deletion - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=True)) # Mock os.path.exists to return True + monkeypatch.setattr( + os, "remove", MagicMock() + ) # Mock os.remove to avoid actual file deletion + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=True) + ) # Mock os.path.exists to return True # Mock vector_database to avoid interaction with the actual database monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) # Mock the HTTP request using patch with patch("requests.delete") as mock_delete: - mock_delete.return_value = MagicMock(status_code=200) # Mock response with status 200 - + mock_delete.return_value = MagicMock( + status_code=200 + ) # Mock response with status 200 + # Call the delete_document method file_path = "/mock/path/to/test.txt" - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True result = await MyTxtFileManager.delete_document(file_path, "test_token") # Check if the result is True, indicating success @@ -133,10 +160,13 @@ async def test_text_file_manager_delete_document(monkeypatch): os.remove.assert_called_once_with(file_path) # Check that the vector_database.delete_document was called with the correct file path - MyTxtFileManager.vector_database.delete_document.assert_called_once_with(file_path) + MyTxtFileManager._vector_database.delete_document.assert_called_once_with( + file_path + ) + def test_get_file_manager(monkeypatch): - + # Mock the UploadFile object txt_File = MagicMock(spec=UploadFile) txt_File.content_type = "text/plain" @@ -150,26 +180,33 @@ def test_get_file_manager(monkeypatch): exe_File.content_type = "application/x-msdownload" exe_File.file = BytesIO(b"Test EXE content") - tfm = get_file_manager(txt_File) pfm = get_file_manager(pdf_File) with pytest.raises(ValueError): get_file_manager(exe_File) - assert isinstance(tfm, TextFileManager), "Should return an instance of TextFileManager" - assert isinstance(pfm, PdfFileManager), "Should return an instance of PdfFileManager" + assert isinstance( + tfm, TextFileManager + ), "Should return an instance of TextFileManager" + assert isinstance( + pfm, PdfFileManager + ), "Should return an instance of PdfFileManager" def test_get_file_manager_by_extension(): # Test for .txt file file_path = "test.txt" file_manager = get_file_manager_by_extension(file_path) - assert isinstance(file_manager, TextFileManager), "Should return an instance of TextFileManager" + assert isinstance( + file_manager, TextFileManager + ), "Should return an instance of TextFileManager" # Test for .pdf file file_path = "test.pdf" file_manager = get_file_manager_by_extension(file_path) - assert isinstance(file_manager, PdfFileManager), "Should return an instance of PdfFileManager" + assert isinstance( + file_manager, PdfFileManager + ), "Should return an instance of PdfFileManager" # Test for unsupported file type with pytest.raises(ValueError): - get_file_manager_by_extension("test.exe") \ No newline at end of file + get_file_manager_by_extension("test.exe") diff --git a/tests/services/test_llm_response_services.py b/tests/services/test_llm_response_services.py index 11ce89f..063e69b 100644 --- a/tests/services/test_llm_response_services.py +++ b/tests/services/test_llm_response_services.py @@ -1,40 +1,62 @@ import pytest -from app.services.llm_response_service import LLMResponseService, get_llm_response_service +from app.services.llm_response_service import ( + LLMResponseService, + get_llm_response_service, +) from app.services.vector_database_service import VectorDatabase, get_vector_database from app.services.llm_service import LLM, get_llm_model from app.schemas import Question from starlette.responses import StreamingResponse + def test_llm_response_service_initialization(): llm_response_service = LLMResponseService() - assert isinstance(llm_response_service, LLMResponseService), "Should return an instance of LLMResponseService" - assert isinstance(llm_response_service.LLM, LLM), "LLM should be an instance of LLM" - assert isinstance(llm_response_service.vector_database, VectorDatabase), "Vector database should be an instance of VectorDatabase" + assert isinstance( + llm_response_service, LLMResponseService + ), "Should return an instance of LLMResponseService" + assert isinstance( + llm_response_service._LLM, LLM + ), "LLM should be an instance of LLM" + assert isinstance( + llm_response_service._vector_database, VectorDatabase + ), "Vector database should be an instance of VectorDatabase" + def test_llm_response_service_get_context(monkeypatch): llm_response_service = LLMResponseService() question = "What is the capital of France?" - + # Mock the vector database search_context method mock_context = "Paris is the capital of France." - monkeypatch.setattr(llm_response_service.vector_database, "search_context", lambda q: mock_context) - + monkeypatch.setattr( + llm_response_service._vector_database, "search_context", lambda q: mock_context + ) + context = llm_response_service._get_context(question) assert context == mock_context, "Should return the mocked context" -@pytest.mark.xfail(reason="This test is expected to fail due to unimplemented functionality") + +@pytest.mark.xfail( + reason="This test is expected to fail due to unimplemented functionality" +) def test_llm_response_service_generate_response(monkeypatch): llm_response_service = LLMResponseService() question = Question(question="What is the capital of France?", messages=[]) # Mock the vector database search_context method mock_context = "Paris is the capital of France." - monkeypatch.setattr(llm_response_service.vector_database, "search_context", lambda q: mock_context) + monkeypatch.setattr( + llm_response_service._vector_database, "search_context", lambda q: mock_context + ) # Mock the LLM model stream method mock_stream_response = ["Paris is the capital of France."] - - monkeypatch.setattr(llm_response_service.LLM.model, "stream", lambda messages: mock_stream_response) - + + monkeypatch.setattr( + llm_response_service._LLM._model, + "stream", + lambda messages: mock_stream_response, + ) + # monkeypatch.setattr(llm_response_service, "generate_llm_response", lambda messages: mock_stream_response) result = llm_response_service.generate_llm_response(question) print(result) @@ -42,7 +64,9 @@ def test_llm_response_service_generate_response(monkeypatch): # assert isinstance(result, ), "Should return a StreamingResponse" assert result == mock_stream_response, "Should return the mocked stream response" + def test_get_llm_response_service(): llm_response_service = get_llm_response_service() - assert isinstance(llm_response_service, LLMResponseService), "Should return an instance of LLMResponseService" - + assert isinstance( + llm_response_service, LLMResponseService + ), "Should return an instance of LLMResponseService" diff --git a/tests/services/test_llm_service.py b/tests/services/test_llm_service.py index 0dafb63..4dc2fa6 100644 --- a/tests/services/test_llm_service.py +++ b/tests/services/test_llm_service.py @@ -2,18 +2,21 @@ from app.services.llm_service import LLM, OpenAI, Ollama, get_llm_model + def test_openai_initialization(monkeypatch): # Mock the environment variable for OpenAI API key monkeypatch.setattr("app.services.llm_service.settings.LLM_PROVIDER", "openai") monkeypatch.setenv("OPENAI_API_KEY", "test_key") llm = OpenAI(model_name="gpt-4") - assert llm.model_name == "gpt-4", "Model name should be 'gpt-4'" + assert llm._model_name == "gpt-4", "Model name should be 'gpt-4'" assert llm.model is not None, "Model should be initialized" assert isinstance(llm, OpenAI), "Expected an instance of OpenAI class" + def test_ollama_initialization(monkeypatch): pass + def test_get_llm_model_openai(monkeypatch): # Mock the environment variable for OpenAI monkeypatch.setattr("app.services.llm_service.settings.LLM_PROVIDER", "openai") @@ -21,14 +24,18 @@ def test_get_llm_model_openai(monkeypatch): llm = get_llm_model() assert isinstance(llm, OpenAI), "Expected an instance of OpenAI class" + def test_get_llm_model_ollama(monkeypatch): # Mock the environment variable for Ollama monkeypatch.setattr("app.services.llm_service.settings.LLM_PROVIDER", "ollama") llm = get_llm_model() assert isinstance(llm, Ollama), "Expected an instance of Ollama class" + def test_get_llm_model_invalid_provider(monkeypatch): # Mock the environment variable for an invalid provider - monkeypatch.setattr("app.services.llm_service.settings.LLM_PROVIDER", "invalid_provider") + monkeypatch.setattr( + "app.services.llm_service.settings.LLM_PROVIDER", "invalid_provider" + ) with pytest.raises(ValueError): get_llm_model() From 91c67c330ae379e0506b360cdc5c9cc32a63e6a9 Mon Sep 17 00:00:00 2001 From: lucaribon Date: Sat, 10 May 2025 15:54:56 +0200 Subject: [PATCH 6/7] fix: missing dependency in generate_chat_name; --- app/routes/llm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/routes/llm.py b/app/routes/llm.py index 729e58d..331cc26 100644 --- a/app/routes/llm.py +++ b/app/routes/llm.py @@ -35,7 +35,7 @@ async def generate_chat_response( @router.post("/chat_name") async def generate_chat_name( - context: schemas.Context + context: schemas.Context, llm_response_service: LLMResponseService = Depends(get_llm_response_service) ): """ Genera un nome per la chat in base al contesto fornito. @@ -53,7 +53,6 @@ async def generate_chat_name( if not context.context: raise HTTPException(status_code=400, detail="Nessun contesto fornito") - llm_response_service = get_llm_response_service() return llm_response_service.generate_llm_chat_name( context.context ) \ No newline at end of file From dfe524026d86215433b347ad7ecdda917fbe6a7d Mon Sep 17 00:00:00 2001 From: lucaribon Date: Sat, 10 May 2025 18:13:25 +0200 Subject: [PATCH 7/7] fix: some failed tests; --- app/routes/documents.py | 33 +- requirements.txt | 6 +- tests/routes/test_documents.py | 45 +- tests/routes/test_llm.py | 8 +- tests/services/test_file_manager_service.py | 527 ++++++++++++------- tests/services/test_llm_response_services.py | 20 +- tests/services/test_llm_service.py | 2 +- tests/test_main.py | 13 - 8 files changed, 413 insertions(+), 241 deletions(-) delete mode 100644 tests/test_main.py diff --git a/app/routes/documents.py b/app/routes/documents.py index bfb9a4d..4a35e84 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -1,4 +1,4 @@ -from fastapi import APIRouter, HTTPException, UploadFile +from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Depends from typing import List import os @@ -107,17 +107,22 @@ async def delete_file(fileDelete: schemas.DocumentDelete): * **HTTPException.500_INTERNAL_SERVER_ERROR**: Se si verifica un errore durante l'eliminazione del file. """ print("delete file title:", fileDelete) - file_manager = get_file_manager_by_extension(fileDelete.title) - if file_manager is None: - raise HTTPException(status_code=400, detail="File manager not found") - - file_path = file_manager.get_full_path(fileDelete.title) - print("file path:", file_path) - await file_manager.delete_document( - fileDelete.id, file_path, fileDelete.token, fileDelete.current_password - ) + try: + file_manager = get_file_manager_by_extension(fileDelete.title) + if file_manager is None: + raise HTTPException(status_code=400, detail="File manager not found") + + file_path = file_manager.get_full_path(fileDelete.title) + print("file path:", file_path) + await file_manager.delete_document( + fileDelete.id, file_path, fileDelete.token, fileDelete.current_password + ) - return {"message": "File deleted successfully"} + return {"message": "File deleted successfully"} + except HTTPException as http_exc: + raise http_exc + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error in deleting file: {str(e)}") @router.get("") @@ -131,4 +136,8 @@ def get_documents(): * **List[str]**: I nomi dei file nella directory /data/documents. """ file_manager = get_file_manager() - return file_manager.get_documents_number(), file_manager.get_documents(),os.listdir("/data/documents") + return ( + file_manager.get_documents_number(), + file_manager.get_documents(), + os.listdir("/data/documents"), + ) diff --git a/requirements.txt b/requirements.txt index b120d73..0d51b84 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,6 @@ pytest-asyncio==0.26.0 httpx==0.28.1 pypdf==5.4.0 python-multipart==0.0.20 -pytest-cov==6.0.0 -coverage==7.6.10 -coveralls==4.0.1 \ No newline at end of file +pytest-cov==6.1.1 +coverage==7.8.0 +coveralls==1.8.0 \ No newline at end of file diff --git a/tests/routes/test_documents.py b/tests/routes/test_documents.py index be4bd30..bdf1e3f 100644 --- a/tests/routes/test_documents.py +++ b/tests/routes/test_documents.py @@ -48,7 +48,7 @@ async def test_upload_file_with_error_ext(monkeypatch): ("files", ("test.exe", BytesIO(b"test content"), "text/plain")), # Valid file ] response = await ac.post( - "/documents/upload_file?token=test_token", + "/documents?token=test_token", files=files, # Correct usage of file as a tuple ) print("Response:", response.json()) @@ -67,7 +67,7 @@ async def test_upload_file_with_error_mme(monkeypatch): ("files", ("test.txt", BytesIO(b"test content"), "text/test")), # Valid file ] response = await ac.post( - "/documents/upload_file?token=test_token", + "/documents?token=test_token", files=files, # Correct usage of file as a tuple ) print("Response:", response.json()) @@ -87,7 +87,7 @@ async def mock_add_document(_,__): ("files", ("test.txt", BytesIO(b"test content"), "text/plain")), # Valid file ] response = await ac.post( - "/documents/upload_file?token=test_token", + "/documents?token=test_token", files=files, # Correct usage of file as a tuple ) print("Response:", response.json()) @@ -108,7 +108,7 @@ async def mock_add_document(_,__): ] response = await ac.post( - "/documents/upload_file?token=test_token", + "/documents?token=test_token", files=files, # Correct usage of file as a tuple ) assert "EERROR" in response.json()["detail"], "EERROR should be in the error message" @@ -127,7 +127,7 @@ async def mock_add_document(_,__): ] response = await ac.post( - "/documents/upload_file?token=test_token", + "/documents?token=test_token", files=files, # Correct usage of file as a tuple ) assert "EERROR" in response.json()["detail"], "EERROR should be in the error message" @@ -146,7 +146,7 @@ async def mock_add_document(_,__): ("files", ("test2.txt", BytesIO(b"test content"), "text/plain")), # Valid file ] response = await ac.post( - "/documents/upload_file?token=test_token", + "/documents?token=test_token", files=files, # Correct usage of file as a tuple ) print("Response:", response.json()) @@ -167,7 +167,7 @@ async def mock_add_document(_,__): ("files", ("test.exe", BytesIO(b"test content"), "text/plain")), # Valid file ] response = await ac.post( - "/documents/upload_file?token=test_token", + "/documents?token=test_token", files=files, # Correct usage of file as a tuple ) print("Response:", response.json()) @@ -195,7 +195,7 @@ async def mock_delete_document(*args, **kwargs): } async with AsyncClient(transport=transport, base_url="http://test") as ac: - response = await ac.request("DELETE", "/documents/delete_file", json=payload) + response = await ac.request("DELETE", "/documents", json=payload) assert response.status_code == 200 assert response.json()["message"] == "File deleted successfully" @@ -211,7 +211,7 @@ async def test_delete_file_file_manager_not_found(monkeypatch): } async with AsyncClient(transport=transport, base_url="http://test") as ac: - response = await ac.request("DELETE", "/documents/delete_file", json=payload) + response = await ac.request("DELETE", "/documents", json=payload) assert response.status_code == 400 assert response.json()["detail"] == "File manager not found" @@ -234,7 +234,7 @@ async def mock_delete_document(*args, **kwargs): } async with AsyncClient(transport=transport, base_url="http://test") as ac: - response = await ac.request("DELETE", "/documents/delete_file", json=payload) + response = await ac.request("DELETE", "/documents", json=payload) assert response.status_code == 404 assert response.json()["detail"] == "Document not found" @@ -257,14 +257,14 @@ async def mock_delete_document(*args, **kwargs): } async with AsyncClient(transport=transport, base_url="http://test") as ac: - response = await ac.request("DELETE", "/documents/delete_file", json=payload) + response = await ac.request("DELETE", "/documents", json=payload) assert response.status_code == 500 assert response.json()["detail"] == "Error in deleting file" @pytest.mark.asyncio async def test_delete_file_http_exception_default(monkeypatch): async def mock_delete_document(*args, **kwargs): - raise HTTPException(status_code=501, detail="Error in deleting file") + raise HTTPException(status_code=500, detail="Error in deleting file") file_manager = MagicMock() file_manager.get_full_path.return_value = "/data/documents/test.txt" @@ -280,7 +280,7 @@ async def mock_delete_document(*args, **kwargs): } async with AsyncClient(transport=transport, base_url="http://test") as ac: - response = await ac.request("DELETE", "/documents/delete_file", json=payload) + response = await ac.request("DELETE", "/documents", json=payload) assert response.status_code == 500 assert response.json()["detail"] == "Error in deleting file" @@ -304,17 +304,30 @@ async def mock_delete_document(*args, **kwargs): } async with AsyncClient(transport=transport, base_url="http://test") as ac: - response = await ac.request("DELETE", "/documents/delete_file", json=payload) + response = await ac.request("DELETE", "/documents", json=payload) assert response.status_code == 500 assert "Error in deleting file" in response.json()["detail"] @pytest.mark.asyncio async def test_get_documents(monkeypatch): + # Mock the file_manager and its methods that get_documents route will call + mock_file_manager = MagicMock() + mock_file_manager.get_documents_number.return_value = 2 + mock_file_manager.get_documents.return_value = ["doc1.txt", "doc2.pdf"] + + # Mock os.listdir as it's also called by the route monkeypatch.setattr(os, "listdir", lambda _: ["doc1.txt", "doc2.pdf"]) + + # Mock get_file_manager to return your mock_file_manager + monkeypatch.setattr("app.routes.documents.get_file_manager", lambda: mock_file_manager) + async with AsyncClient(transport=transport, base_url="http://test") as ac: - response = await ac.get("/documents/get_documents") + response = await ac.get("/documents") print("Response:", response.json()) assert response.status_code == 200 - assert response.json() == ["doc1.txt", "doc2.pdf"], "Should return the list of documents" \ No newline at end of file + # Adjust the expected response based on what your route actually returns: + # The route returns a tuple: (number, list_of_docs_from_manager, list_of_docs_from_os_listdir) + expected_response_data = [2, ["doc1.txt", "doc2.pdf"], ["doc1.txt", "doc2.pdf"]] + assert response.json() == expected_response_data, "Should return the document count and names" \ No newline at end of file diff --git a/tests/routes/test_llm.py b/tests/routes/test_llm.py index 9cd19cf..6cc76bb 100644 --- a/tests/routes/test_llm.py +++ b/tests/routes/test_llm.py @@ -58,10 +58,4 @@ async def test_generate_chat_name_no_context(): async with AsyncClient(transport=transport, base_url="http://test") as ac: response = await ac.post("/llm/chat_name", json={"context": ""}) assert response.status_code == 400 - assert response.json() == {"detail": "Nessun contesto fornito"} - -@pytest.mark.asyncio -async def test_ping(): - async with AsyncClient(transport=transport, base_url="http://test") as ac: - response = await ac.get("/llm/ping") - assert response.status_code == 200 \ No newline at end of file + assert response.json() == {"detail": "Nessun contesto fornito"} \ No newline at end of file diff --git a/tests/services/test_file_manager_service.py b/tests/services/test_file_manager_service.py index 61afc28..5798506 100644 --- a/tests/services/test_file_manager_service.py +++ b/tests/services/test_file_manager_service.py @@ -14,6 +14,7 @@ import requests from langchain_community.document_loaders import PyPDFLoader, TextLoader + @pytest.fixture(autouse=True) def documents_dir(tmp_path, monkeypatch): # Create a temporary directory for the test @@ -26,13 +27,14 @@ def documents_dir(tmp_path, monkeypatch): # Cleanup is handled by pytest's tmp_path fixture -def test_txt_file_manager_get_full_path(documents_dir,monkeypatch): +def test_txt_file_manager_get_full_path(documents_dir, monkeypatch): MyTxtFileManager = TextFileManager() file_name = "test.txt" file_path = MyTxtFileManager.get_full_path(file_name) expected_path = os.path.join(documents_dir, "test.txt") - assert file_path == expected_path, "Should return the correct full path for the .txt file" - + assert ( + file_path == expected_path + ), "Should return the correct full path for the .txt file" def test_txt_file_manager_save_file(monkeypatch): @@ -49,7 +51,9 @@ def test_txt_file_manager_save_file(monkeypatch): file.seek.return_value = MagicMock(side_effect=None) # Mock seek method # Mock _get_full_path method - monkeypatch.setattr(MyTxtFileManager, "get_full_path", lambda x: os.path.join(".cache", x)) + monkeypatch.setattr( + MyTxtFileManager, "get_full_path", lambda x: os.path.join(".cache", x) + ) # pass with open(file_path, "wb") as f: with patch("builtins.open", MagicMock()): @@ -89,38 +93,27 @@ async def test_text_file_manager_add_document(monkeypatch): # Create mock implementations for _save_file and _load_split_file async def mock_save_file(file): - return "/mock/path/to/test.txt" # Return a mock file path + return "/mock/path/to/test.txt" async def mock_load_split_file(file_path): - return ["chunk1", "chunk2", "chunk3"] # Mock the chunks from file splitting + return ["chunk1", "chunk2", "chunk3"] - # Use monkeypatch to replace the methods with the mock implementations monkeypatch.setattr(MyTxtFileManager, "_save_file", mock_save_file) monkeypatch.setattr(MyTxtFileManager, "_load_split_file", mock_load_split_file) - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) + # CORRECTED: Mock the _vector_database attribute + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) - # Mock HTTP request using patch to completely prevent the actual request with patch("requests.post") as mock_post: - mock_post.return_value = MagicMock( - status_code=201 - ) # Mock response with status 201 + mock_post.return_value = MagicMock(status_code=201) - # Create a mock UploadFile instance file = MagicMock(spec=UploadFile) file.filename = "test.txt" - file.file = BytesIO(b"Test content") # Mock the file content + file.file = BytesIO(b"Test content") - # Call the add_document method result = await MyTxtFileManager.add_document(file, "test_token") - # Check if the result is True, indicating success - assert ( - result is True - ), "Should return True if the document is added successfully" - - # You can also check if the vector_database.add_documents was called correctly + assert result is True MyTxtFileManager._vector_database.add_documents.assert_called_once_with( ["chunk1", "chunk2", "chunk3"] ) @@ -134,7 +127,7 @@ async def test_text_file_manager_add_document_error_400(monkeypatch): # Create mock implementations for _save_file and _load_split_file async def mock_save_file(file): return "/mock/path/to/test.txt" # Return a mock file path - + async def mock_load_split_file(file_path): return ["chunk1", "chunk2", "chunk3"] # Mock the chunks from file splitting @@ -142,13 +135,15 @@ async def mock_load_split_file(file_path): monkeypatch.setattr(MyTxtFileManager, "_save_file", mock_save_file) monkeypatch.setattr(MyTxtFileManager, "_load_split_file", mock_load_split_file) - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) # Mock HTTP request using patch to completely prevent the actual request with patch("requests.post") as mock_post: - mock_post.return_value = MagicMock(status_code=400) # Mock response with status 201 - + mock_post.return_value = MagicMock( + status_code=400 + ) # Mock response with status 201 + # Create a mock UploadFile instance file = MagicMock(spec=UploadFile) file.filename = "test.txt" @@ -157,8 +152,12 @@ async def mock_load_split_file(file_path): # Call the add_document method with pytest.raises(HTTPException) as exc_info: result = await MyTxtFileManager.add_document(file, "test_token") - assert exc_info.value.status_code == 400, "Should raise HTTPException with status code 400" - assert exc_info.value.detail == "Documento già esistente", "Should raise HTTPException with the correct detail" + assert ( + exc_info.value.status_code == 400 + ), "Should raise HTTPException with status code 400" + assert ( + exc_info.value.detail == "Documento già esistente" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -169,7 +168,7 @@ async def test_text_file_manager_add_document_error_500(monkeypatch): # Create mock implementations for _save_file and _load_split_file async def mock_save_file(file): return "/mock/path/to/test.txt" # Return a mock file path - + async def mock_load_split_file(file_path): return ["chunk1", "chunk2", "chunk3"] # Mock the chunks from file splitting @@ -177,13 +176,15 @@ async def mock_load_split_file(file_path): monkeypatch.setattr(MyTxtFileManager, "_save_file", mock_save_file) monkeypatch.setattr(MyTxtFileManager, "_load_split_file", mock_load_split_file) - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) # Mock HTTP request using patch to completely prevent the actual request with patch("requests.post") as mock_post: - mock_post.return_value = MagicMock(status_code=500) # Mock response with status 201 - + mock_post.return_value = MagicMock( + status_code=500 + ) # Mock response with status 201 + # Create a mock UploadFile instance file = MagicMock(spec=UploadFile) file.filename = "test.txt" @@ -192,9 +193,12 @@ async def mock_load_split_file(file_path): # Call the add_document method with pytest.raises(HTTPException) as exc_info: result = await MyTxtFileManager.add_document(file, "test_token") - assert exc_info.value.status_code == 500, "Should raise HTTPException with status code 400" - assert exc_info.value.detail == "Errore nel caricare e processare file", "Should raise HTTPException with the correct detail" - + assert ( + exc_info.value.status_code == 500 + ), "Should raise HTTPException with status code 400" + assert ( + exc_info.value.detail == "Errore nel caricare e processare file" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -205,7 +209,7 @@ async def test_text_file_manager_add_document_error_defalut(monkeypatch): # Create mock implementations for _save_file and _load_split_file async def mock_save_file(file): return "/mock/path/to/test.txt" # Return a mock file path - + async def mock_load_split_file(file_path): return ["chunk1", "chunk2", "chunk3"] # Mock the chunks from file splitting @@ -213,13 +217,15 @@ async def mock_load_split_file(file_path): monkeypatch.setattr(MyTxtFileManager, "_save_file", mock_save_file) monkeypatch.setattr(MyTxtFileManager, "_load_split_file", mock_load_split_file) - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) # Mock HTTP request using patch to completely prevent the actual request with patch("requests.post") as mock_post: - mock_post.return_value = MagicMock(status_code=501) # Mock response with status 201 - + mock_post.return_value = MagicMock( + status_code=501 + ) # Mock response with status 201 + # Create a mock UploadFile instance file = MagicMock(spec=UploadFile) file.filename = "test.txt" @@ -228,8 +234,12 @@ async def mock_load_split_file(file_path): # Call the add_document method with pytest.raises(HTTPException) as exc_info: result = await MyTxtFileManager.add_document(file, "test_token") - assert exc_info.value.status_code == 500, "Should raise HTTPException with status code 400" - assert exc_info.value.detail == "Errore nel caricare e processare file", "Should raise HTTPException with the correct detail" + assert ( + exc_info.value.status_code == 500 + ), "Should raise HTTPException with status code 400" + assert ( + exc_info.value.detail == "Errore nel caricare e processare file" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -239,17 +249,25 @@ async def test_delete_document_os_remove_error(monkeypatch): # Mock the file deletion logic monkeypatch.setattr(os, "remove", MagicMock(side_effect=OSError("File not found"))) - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=True)) # Mock os.path.exists to return True - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=True) + ) # Mock os.path.exists to return True + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True - # Mock vector_database to avoid interaction with the actual database + # Mock _vector_database to avoid interaction with the actual database with pytest.raises(HTTPException) as exc_info: file_path = "/mock/path/to/test.txt" - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") - assert exc_info.value.status_code == 404, "Should raise HTTPException with status code 404" - assert exc_info.value.detail.startswith(f"File {file_path} non trovato:"), "Should raise HTTPException with the correct detail" - - + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + assert ( + exc_info.value.status_code == 404 + ), "Should raise HTTPException with status code 404" + assert exc_info.value.detail.startswith( + f"File {file_path} non trovato:" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -260,7 +278,7 @@ async def test_text_file_manager_add_document_error_400(monkeypatch): # Create mock implementations for _save_file and _load_split_file async def mock_save_file(file): return "/mock/path/to/test.txt" # Return a mock file path - + async def mock_load_split_file(file_path): return ["chunk1", "chunk2", "chunk3"] # Mock the chunks from file splitting @@ -268,13 +286,15 @@ async def mock_load_split_file(file_path): monkeypatch.setattr(MyTxtFileManager, "_save_file", mock_save_file) monkeypatch.setattr(MyTxtFileManager, "_load_split_file", mock_load_split_file) - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) # Mock HTTP request using patch to completely prevent the actual request with patch("requests.post") as mock_post: - mock_post.return_value = MagicMock(status_code=400) # Mock response with status 201 - + mock_post.return_value = MagicMock( + status_code=400 + ) # Mock response with status 201 + # Create a mock UploadFile instance file = MagicMock(spec=UploadFile) file.filename = "test.txt" @@ -283,8 +303,12 @@ async def mock_load_split_file(file_path): # Call the add_document method with pytest.raises(HTTPException) as exc_info: result = await MyTxtFileManager.add_document(file, "test_token") - assert exc_info.value.status_code == 400, "Should raise HTTPException with status code 400" - assert exc_info.value.detail == "Documento già esistente", "Should raise HTTPException with the correct detail" + assert ( + exc_info.value.status_code == 400 + ), "Should raise HTTPException with status code 400" + assert ( + exc_info.value.detail == "Documento già esistente" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -295,7 +319,7 @@ async def test_text_file_manager_add_document_error_500(monkeypatch): # Create mock implementations for _save_file and _load_split_file async def mock_save_file(file): return "/mock/path/to/test.txt" # Return a mock file path - + async def mock_load_split_file(file_path): return ["chunk1", "chunk2", "chunk3"] # Mock the chunks from file splitting @@ -303,13 +327,15 @@ async def mock_load_split_file(file_path): monkeypatch.setattr(MyTxtFileManager, "_save_file", mock_save_file) monkeypatch.setattr(MyTxtFileManager, "_load_split_file", mock_load_split_file) - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) # Mock HTTP request using patch to completely prevent the actual request with patch("requests.post") as mock_post: - mock_post.return_value = MagicMock(status_code=500) # Mock response with status 201 - + mock_post.return_value = MagicMock( + status_code=500 + ) # Mock response with status 201 + # Create a mock UploadFile instance file = MagicMock(spec=UploadFile) file.filename = "test.txt" @@ -318,9 +344,12 @@ async def mock_load_split_file(file_path): # Call the add_document method with pytest.raises(HTTPException) as exc_info: result = await MyTxtFileManager.add_document(file, "test_token") - assert exc_info.value.status_code == 500, "Should raise HTTPException with status code 400" - assert exc_info.value.detail == "Errore nel caricare e processare file", "Should raise HTTPException with the correct detail" - + assert ( + exc_info.value.status_code == 500 + ), "Should raise HTTPException with status code 400" + assert ( + exc_info.value.detail == "Errore nel caricare e processare file" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -331,7 +360,7 @@ async def test_text_file_manager_add_document_error_defalut(monkeypatch): # Create mock implementations for _save_file and _load_split_file async def mock_save_file(file): return "/mock/path/to/test.txt" # Return a mock file path - + async def mock_load_split_file(file_path): return ["chunk1", "chunk2", "chunk3"] # Mock the chunks from file splitting @@ -339,13 +368,15 @@ async def mock_load_split_file(file_path): monkeypatch.setattr(MyTxtFileManager, "_save_file", mock_save_file) monkeypatch.setattr(MyTxtFileManager, "_load_split_file", mock_load_split_file) - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) # Mock HTTP request using patch to completely prevent the actual request with patch("requests.post") as mock_post: - mock_post.return_value = MagicMock(status_code=501) # Mock response with status 201 - + mock_post.return_value = MagicMock( + status_code=501 + ) # Mock response with status 201 + # Create a mock UploadFile instance file = MagicMock(spec=UploadFile) file.filename = "test.txt" @@ -354,8 +385,12 @@ async def mock_load_split_file(file_path): # Call the add_document method with pytest.raises(HTTPException) as exc_info: result = await MyTxtFileManager.add_document(file, "test_token") - assert exc_info.value.status_code == 500, "Should raise HTTPException with status code 400" - assert exc_info.value.detail == "Errore nel caricare e processare file", "Should raise HTTPException with the correct detail" + assert ( + exc_info.value.status_code == 500 + ), "Should raise HTTPException with status code 400" + assert ( + exc_info.value.detail == "Errore nel caricare e processare file" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -365,15 +400,25 @@ async def test_delete_document_os_remove_error(monkeypatch): # Mock the file deletion logic monkeypatch.setattr(os, "remove", MagicMock(side_effect=OSError("File not found"))) - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=True)) # Mock os.path.exists to return True - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=True) + ) # Mock os.path.exists to return True + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True - # Mock vector_database to avoid interaction with the actual database + # Mock _vector_database to avoid interaction with the actual database with pytest.raises(HTTPException) as exc_info: file_path = "/mock/path/to/test.txt" - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") - assert exc_info.value.status_code == 404, "Should raise HTTPException with status code 404" - assert exc_info.value.detail.startswith(f"File {file_path} non trovato:"), "Should raise HTTPException with the correct detail" + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + assert ( + exc_info.value.status_code == 404 + ), "Should raise HTTPException with status code 404" + assert exc_info.value.detail.startswith( + f"File {file_path} non trovato:" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -389,17 +434,23 @@ async def test_text_file_manager_delete_document(monkeypatch): os.path, "exists", MagicMock(return_value=True) ) # Mock os.path.exists to return True - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) # Mock the HTTP request using patch with patch("requests.delete") as mock_delete: - mock_delete.return_value = MagicMock(status_code=204) # Mock response with status 200 - + mock_delete.return_value = MagicMock( + status_code=204 + ) # Mock response with status 200 + # Call the delete_document method file_path = "/mock/path/to/test.txt" - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) # Check if the result is True, indicating success assert result is None, "The document should be deleted successfully" @@ -407,35 +458,50 @@ async def test_text_file_manager_delete_document(monkeypatch): # Assert that the file removal from the filesystem was attempted os.remove.assert_called_once_with(file_path) - # Check that the vector_database.delete_document was called with the correct file path + # Check that the _vector_database.delete_document was called with the correct file path MyTxtFileManager._vector_database.delete_document.assert_called_once_with( file_path ) - + @pytest.mark.asyncio async def test_text_file_manager_delete_document_not_found(monkeypatch): # Create an instance of TextFileManager MyTxtFileManager = TextFileManager() # Mock the file deletion logic - monkeypatch.setattr(os, "remove", MagicMock()) # Mock os.remove to avoid actual file deletion - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=True)) # Mock os.path.exists to return True + monkeypatch.setattr( + os, "remove", MagicMock() + ) # Mock os.remove to avoid actual file deletion + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=True) + ) # Mock os.path.exists to return True - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) - - # Mock the HTTP request using patch + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) + + # Mock the HTTP request using patch with patch("requests.delete") as mock_delete: - mock_delete.return_value = MagicMock(status_code=400) # Mock response with status 200 - + mock_delete.return_value = MagicMock( + status_code=404 + ) # Mock response with status 200 + # Call the delete_document method file_path = "/mock/path/to/test.txt" - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True with pytest.raises(HTTPException) as exc_info: - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") - assert exc_info.value.status_code == 400, "Should raise HTTPException with status code 400" - assert exc_info.value.detail == "Documento non trovato", "Should raise HTTPException with the correct detail" + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + assert ( + exc_info.value.status_code == 404 + ), "Should raise HTTPException with status code 404" + assert ( + exc_info.value.detail == "Documento non trovato" + ), "Should raise HTTPException with the correct detail" + @pytest.mark.asyncio async def test_text_file_manager_delete_document_500_exception(monkeypatch): @@ -443,46 +509,76 @@ async def test_text_file_manager_delete_document_500_exception(monkeypatch): MyTxtFileManager = TextFileManager() # Mock the file deletion logic - monkeypatch.setattr(os, "remove", MagicMock()) # Mock os.remove to avoid actual file deletion - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=True)) # Mock os.path.exists to return True + monkeypatch.setattr( + os, "remove", MagicMock() + ) # Mock os.remove to avoid actual file deletion + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=True) + ) # Mock os.path.exists to return True - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) - # Mock the HTTP request using patch + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) + # Mock the HTTP request using patch with patch("requests.delete") as mock_delete: - mock_delete.return_value = MagicMock(status_code=500) # Mock response with status 500 + mock_delete.return_value = MagicMock( + status_code=500 + ) # Mock response with status 500 # Call the delete_document method file_path = "/mock/path/to/test.txt" - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True with pytest.raises(HTTPException) as exc_info: - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") - assert exc_info.value.status_code == 500, "Should raise HTTPException with status code 500" - assert exc_info.value.detail == "Errore nel caricare e processare file", "Should raise HTTPException with the correct detail" + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + assert ( + exc_info.value.status_code == 500 + ), "Should raise HTTPException with status code 500" + assert ( + exc_info.value.detail == "Errore nel caricare e processare file" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio -async def test_text_file_manager_delete_document_default_exception(documents_dir,monkeypatch): +async def test_text_file_manager_delete_document_default_exception( + documents_dir, monkeypatch +): # Create an instance of TextFileManager MyTxtFileManager = TextFileManager() # Mock the file deletion logic - monkeypatch.setattr(os, "remove", MagicMock(return_value=True)) # Mock os.remove to avoid actual file deletion - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=True)) # Mock os.path.exists to return True + monkeypatch.setattr( + os, "remove", MagicMock(return_value=True) + ) # Mock os.remove to avoid actual file deletion + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=True) + ) # Mock os.path.exists to return True - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) - # Mock the HTTP request using patch + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) + # Mock the HTTP request using patch with patch("requests.delete") as mock_delete: - mock_delete.return_value = MagicMock(status_code=501) # Mock response with status 500 + mock_delete.return_value = MagicMock( + status_code=501 + ) # Mock response with status 500 # Call the delete_document method file_path = "/mock/path/to/test.txt" - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True with pytest.raises(HTTPException) as exc_info: - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") - assert exc_info.value.status_code == 500, "Should raise HTTPException with status code 500" - assert exc_info.value.detail == "Errore nel caricare e processare file", "Should raise HTTPException with the correct detail" + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + assert ( + exc_info.value.status_code == 500 + ), "Should raise HTTPException with status code 500" + assert ( + exc_info.value.detail == "Errore nel caricare e processare file" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -491,46 +587,70 @@ async def test_text_file_manager_delete_path_not_found(monkeypatch): MyTxtFileManager = TextFileManager() # Mock the file deletion logic - monkeypatch.setattr(os, "remove", MagicMock()) # Mock os.remove to avoid actual file deletion - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=False)) # Mock os.path.exists to return True + monkeypatch.setattr( + os, "remove", MagicMock() + ) # Mock os.remove to avoid actual file deletion + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=False) + ) # Mock os.path.exists to return True + + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) - with pytest.raises(HTTPException) as exc_info: # Call the delete_document method file_path = "/mock/path/to/test.txt" monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + + assert ( + exc_info.value.status_code == 404 + ), "Should raise HTTPException with status code 404" + assert ( + exc_info.value.detail == f"File {file_path} non trovato" + ), "Should raise HTTPException with the correct detail" - assert exc_info.value.status_code == 404, "Should raise HTTPException with status code 404" - assert exc_info.value.detail == f"File {file_path} non trovato", "Should raise HTTPException with the correct detail" - - @pytest.mark.asyncio async def test_text_file_manager_delete_document_not_found(monkeypatch): # Create an instance of TextFileManager MyTxtFileManager = TextFileManager() # Mock the file deletion logic - monkeypatch.setattr(os, "remove", MagicMock()) # Mock os.remove to avoid actual file deletion - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=True)) # Mock os.path.exists to return True + monkeypatch.setattr( + os, "remove", MagicMock() + ) # Mock os.remove to avoid actual file deletion + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=True) + ) # Mock os.path.exists to return True - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) - - # Mock the HTTP request using patch + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) + + # Mock the HTTP request using patch with patch("requests.delete") as mock_delete: - mock_delete.return_value = MagicMock(status_code=400) # Mock response with status 200 - + mock_delete.return_value = MagicMock( + status_code=400 + ) # Mock response with status 200 + # Call the delete_document method file_path = "/mock/path/to/test.txt" - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True with pytest.raises(HTTPException) as exc_info: - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") - assert exc_info.value.status_code == 400, "Should raise HTTPException with status code 400" - assert exc_info.value.detail == "Documento non trovato", "Should raise HTTPException with the correct detail" + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + assert ( + exc_info.value.status_code == 400 + ), "Should raise HTTPException with status code 400" + assert ( + exc_info.value.detail == "Documento non trovato" + ), "Should raise HTTPException with the correct detail" + @pytest.mark.asyncio async def test_text_file_manager_delete_document_500_exception(monkeypatch): @@ -538,46 +658,76 @@ async def test_text_file_manager_delete_document_500_exception(monkeypatch): MyTxtFileManager = TextFileManager() # Mock the file deletion logic - monkeypatch.setattr(os, "remove", MagicMock()) # Mock os.remove to avoid actual file deletion - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=True)) # Mock os.path.exists to return True + monkeypatch.setattr( + os, "remove", MagicMock() + ) # Mock os.remove to avoid actual file deletion + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=True) + ) # Mock os.path.exists to return True - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) - # Mock the HTTP request using patch + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) + # Mock the HTTP request using patch with patch("requests.delete") as mock_delete: - mock_delete.return_value = MagicMock(status_code=500) # Mock response with status 500 + mock_delete.return_value = MagicMock( + status_code=500 + ) # Mock response with status 500 # Call the delete_document method file_path = "/mock/path/to/test.txt" - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True with pytest.raises(HTTPException) as exc_info: - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") - assert exc_info.value.status_code == 500, "Should raise HTTPException with status code 500" - assert exc_info.value.detail == "Errore nel caricare e processare file", "Should raise HTTPException with the correct detail" + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + assert ( + exc_info.value.status_code == 500 + ), "Should raise HTTPException with status code 500" + assert ( + exc_info.value.detail == "Errore nel caricare e processare file" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio -async def test_text_file_manager_delete_document_default_exception(documents_dir,monkeypatch): +async def test_text_file_manager_delete_document_default_exception( + documents_dir, monkeypatch +): # Create an instance of TextFileManager MyTxtFileManager = TextFileManager() # Mock the file deletion logic - monkeypatch.setattr(os, "remove", MagicMock(return_value=True)) # Mock os.remove to avoid actual file deletion - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=True)) # Mock os.path.exists to return True + monkeypatch.setattr( + os, "remove", MagicMock(return_value=True) + ) # Mock os.remove to avoid actual file deletion + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=True) + ) # Mock os.path.exists to return True - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) - # Mock the HTTP request using patch + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) + # Mock the HTTP request using patch with patch("requests.delete") as mock_delete: - mock_delete.return_value = MagicMock(status_code=501) # Mock response with status 500 + mock_delete.return_value = MagicMock( + status_code=501 + ) # Mock response with status 500 # Call the delete_document method file_path = "/mock/path/to/test.txt" - monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) # Mock os.path.isfile to return True + monkeypatch.setattr( + os.path, "isfile", MagicMock(return_value=True) + ) # Mock os.path.isfile to return True with pytest.raises(HTTPException) as exc_info: - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") - assert exc_info.value.status_code == 500, "Should raise HTTPException with status code 500" - assert exc_info.value.detail == "Errore nel caricare e processare file", "Should raise HTTPException with the correct detail" + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + assert ( + exc_info.value.status_code == 500 + ), "Should raise HTTPException with status code 500" + assert ( + exc_info.value.detail == "Errore nel caricare e processare file" + ), "Should raise HTTPException with the correct detail" @pytest.mark.asyncio @@ -586,21 +736,31 @@ async def test_text_file_manager_delete_path_not_found(monkeypatch): MyTxtFileManager = TextFileManager() # Mock the file deletion logic - monkeypatch.setattr(os, "remove", MagicMock()) # Mock os.remove to avoid actual file deletion - monkeypatch.setattr(os.path, "exists", MagicMock(return_value=False)) # Mock os.path.exists to return True + monkeypatch.setattr( + os, "remove", MagicMock() + ) # Mock os.remove to avoid actual file deletion + monkeypatch.setattr( + os.path, "exists", MagicMock(return_value=False) + ) # Mock os.path.exists to return True + + # Mock _vector_database to avoid interaction with the actual database + monkeypatch.setattr(MyTxtFileManager, "_vector_database", MagicMock()) - # Mock vector_database to avoid interaction with the actual database - monkeypatch.setattr(MyTxtFileManager, "vector_database", MagicMock()) - with pytest.raises(HTTPException) as exc_info: # Call the delete_document method file_path = "/mock/path/to/test.txt" monkeypatch.setattr(os.path, "isfile", MagicMock(return_value=True)) - result = await MyTxtFileManager.delete_document("idid",file_path, "test_token","pwdpwd") + result = await MyTxtFileManager.delete_document( + "idid", file_path, "test_token", "pwdpwd" + ) + + assert ( + exc_info.value.status_code == 404 + ), "Should raise HTTPException with status code 404" + assert ( + exc_info.value.detail == f"File {file_path} non trovato" + ), "Should raise HTTPException with the correct detail" - assert exc_info.value.status_code == 404, "Should raise HTTPException with status code 404" - assert exc_info.value.detail == f"File {file_path} non trovato", "Should raise HTTPException with the correct detail" - def test_get_file_manager(monkeypatch): @@ -648,17 +808,26 @@ def test_get_file_manager_by_extension(): with pytest.raises(ValueError): get_file_manager_by_extension("test.exe") + @pytest.mark.asyncio async def test_pdf_file_manager_load_split_file(monkeypatch): MyPdfFileManager = PdfFileManager() FakePdfFileManager = MagicMock() FakePdfFileManager.load = MagicMock(return_value=["chunk1", "chunk2", "chunk3"]) - + FakeSplitter = MagicMock() - FakeSplitter.split_documents = MagicMock(return_value=["chunk1", "chunk2", "chunk3"]) - monkeypatch.setattr(MyPdfFileManager,"splitter", FakeSplitter) + FakeSplitter.split_documents = MagicMock( + return_value=["chunk1", "chunk2", "chunk3"] + ) + monkeypatch.setattr(MyPdfFileManager, "_splitter", FakeSplitter) # mock langchain_community.document_loaders.PyPDFLoader - monkeypatch.setattr("app.services.file_manager_service.PyPDFLoader", FakePdfFileManager) + monkeypatch.setattr( + "app.services.file_manager_service.PyPDFLoader", FakePdfFileManager + ) await MyPdfFileManager._load_split_file("test.pdf") - assert FakeSplitter.split_documents.called, "Should call the split_documents method of the splitter" - assert isinstance(FakeSplitter.split_documents.return_value, list), "Should return a list of documents" + assert ( + FakeSplitter.split_documents.called + ), "Should call the split_documents method of the _splitter" + assert isinstance( + FakeSplitter.split_documents.return_value, list + ), "Should return a list of documents" diff --git a/tests/services/test_llm_response_services.py b/tests/services/test_llm_response_services.py index 7055d59..2aef359 100644 --- a/tests/services/test_llm_response_services.py +++ b/tests/services/test_llm_response_services.py @@ -42,7 +42,7 @@ def test_llm_response_service_get_context_false(monkeypatch): # Mock the vector database search_context method mock_context = "Paris is the capital of France." - monkeypatch.setattr(llm_response_service.vector_database, "search_context", lambda q: None) + monkeypatch.setattr(llm_response_service._vector_database, "search_context", lambda q: None) with pytest.raises(HTTPException) as excinfo: llm_response_service._get_context(question) @@ -54,7 +54,7 @@ def test_llm_response_service_get_context_exception(monkeypatch): question = "What is the capital of France?" mock_search_context = MagicMock(side_effect=Exception("Database error")) - monkeypatch.setattr(llm_response_service.vector_database, "search_context", mock_search_context) + monkeypatch.setattr(llm_response_service._vector_database, "search_context", mock_search_context) with pytest.raises(HTTPException) as excinfo: llm_response_service._get_context(question) @@ -72,11 +72,11 @@ def test_generate_llm_chat_name(monkeypatch): llm_response_service = LLMResponseService() - llm_response_service.LLM = MagicMock() - llm_response_service.LLM.model = MagicMock() + llm_response_service._LLM = MagicMock() + llm_response_service._LLM.model = MagicMock() llm_response_service._get_context = lambda question: "mocked context" - llm_response_service.LLM.model.invoke = response + llm_response_service._LLM.model.invoke = response llm_response_service.generate_llm_chat_name("mocked question") assert response.content == "mocked response content", "Should return the mocked response content" @@ -90,7 +90,7 @@ def throw_exception(*args, **kwargs): llm_response_service = LLMResponseService() llm_response_service._get_context = lambda question: "mocked context" - llm_response_service.LLM.model = response + llm_response_service._LLM.model = response with pytest.raises(HTTPException) as excinfo: llm_response_service.generate_llm_chat_name("mocked question") @@ -116,7 +116,7 @@ async def mock_astream(messages): service = LLMResponseService() monkeypatch.setattr(service, "_get_context", mock_search_context) question = Question(question="Voglio pizza!", messages=[Message(sender="me", content="Hi")]) - monkeypatch.setattr(service, "LLM", mock_LLM) + monkeypatch.setattr(service, "_LLM", mock_LLM) result = service.generate_llm_response(question) assert isinstance(result, StreamingResponse), "Should be a StreamingResponse instance" @@ -144,7 +144,7 @@ async def mock_astream(messages): service = LLMResponseService() monkeypatch.setattr(service, "_get_context", mock_search_context) question = Question(question="Voglio pizza!", messages=[]) - monkeypatch.setattr(service, "LLM", mock_LLM) + monkeypatch.setattr(service, "_LLM", mock_LLM) result = service.generate_llm_response(question) assert isinstance(result, StreamingResponse), "Should be a StreamingResponse instance" @@ -168,7 +168,7 @@ def mock_astream(messages): service = LLMResponseService() monkeypatch.setattr(service, "_get_context", mock_search_context) question = Question(question="Voglio pizza!", messages=[Message(sender="me", content="Hi")]) - monkeypatch.setattr(service, "LLM", mock_LLM) + monkeypatch.setattr(service, "_LLM", mock_LLM) result = service.generate_llm_response(question) assert isinstance(result, StreamingResponse), "Should be a StreamingResponse instance" @@ -193,7 +193,7 @@ def mock_astream(messages): service = LLMResponseService() monkeypatch.setattr(service, "_get_context", mock_search_context) question = Question(question="Voglio pizza!", messages=[Message(sender="me", content="Hi")]) - monkeypatch.setattr(service, "LLM", mock_LLM) + monkeypatch.setattr(service, "_LLM", mock_LLM) with pytest.raises(HTTPException) as excinfo: service.generate_llm_response(question) diff --git a/tests/services/test_llm_service.py b/tests/services/test_llm_service.py index 3e58289..a3c0c54 100644 --- a/tests/services/test_llm_service.py +++ b/tests/services/test_llm_service.py @@ -8,7 +8,7 @@ def test_openai_initialization(monkeypatch): monkeypatch.setenv("OPENAI_API_KEY", "test_key") llm = OpenAI(model_name="gpt-4") assert llm._model_name == "gpt-4", "Model name should be 'gpt-4'" - assert llm.model is not None, "Model should be initialized" + assert llm._model is not None, "Model should be initialized" assert isinstance(llm, OpenAI), "Expected an instance of OpenAI class" diff --git a/tests/test_main.py b/tests/test_main.py deleted file mode 100644 index d683029..0000000 --- a/tests/test_main.py +++ /dev/null @@ -1,13 +0,0 @@ -import pytest - -from app.main import app -from httpx import ASGITransport, AsyncClient - -transport = ASGITransport(app=app) - -@pytest.mark.asyncio -async def test_main(): - async with AsyncClient(transport=transport, base_url="http://test") as ac: - response = await ac.get("/ping") - assert response.status_code == 200 - assert response.json().get("status") == "ok" \ No newline at end of file