diff --git a/.gitignore b/.gitignore index ba04025..3b032c2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ *.swp tags node_modules -__pycache__ \ No newline at end of file +__pycache__ +*scratch* +*.vscode \ No newline at end of file diff --git a/rag_service/api/test_api.py b/rag_service/api/test_api.py deleted file mode 100644 index 18bf7f3..0000000 --- a/rag_service/api/test_api.py +++ /dev/null @@ -1,33 +0,0 @@ -# File: ~/frappe-bench/apps/rag_service/rag_service/api/test_api.py - -@frappe.whitelist(allow_guest=False) -def generate_submission_feedback(): - """Generate feedback for a submission""" - try: - data = frappe.request.get_json() - - if not data: - frappe.throw("No data provided") - - content = data.get("content") - submission_id = data.get("submission_id") or f"submission_{now()}" - - if not content: - frappe.throw("Content is required") - - result = generate_feedback(submission_id, content) - - return { - "status": "success", - "submission_id": submission_id, - "feedback": result["feedback"], - "metadata": result["metadata"], - "similar_contents": result["similar_contents"] - } - - except Exception as e: - frappe.log_error(frappe.get_traceback(), "Feedback Generation Error") - return { - "status": "error", - "message": str(e) - } diff --git a/rag_service/config/__init__.py b/rag_service/config/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/rag_service/core/assignment_context_manager.py b/rag_service/core/assignment_context_manager.py index c5cc8d7..b31824f 100644 --- a/rag_service/core/assignment_context_manager.py +++ b/rag_service/core/assignment_context_manager.py @@ -19,12 +19,14 @@ def __init__(self): print("\nInitialized AssignmentContextManager") print(f"Using API Endpoint: {self.settings.base_url.rstrip('/')}/{self.settings.assignment_context_endpoint.lstrip('/')}") - async def get_assignment_context(self, assignment_id: str) -> Dict: + async def get_assignment_context(self, assignment_id: str, student_id: str) -> Dict: """Get assignment context from cache or API""" try: print(f"\n=== Getting Assignment Context for: {assignment_id} ===") + + context = None - # 1. Check cache if enabled + # Check cache if enabled if self.settings.enable_caching: cached_context = frappe.get_list( "Assignment Context", @@ -37,41 +39,91 @@ async def get_assignment_context(self, assignment_id: str) -> Dict: if cached_context: print("Found cached context") - return await self._format_cached_context(cached_context[0].name) - - # 2. If not in cache or caching disabled, fetch from API - print("Fetching context from API...") - context = await self._fetch_from_api(assignment_id) - - # 3. Save to cache if enabled - if self.settings.enable_caching: - print("Saving to cache...") - await self._save_to_cache(assignment_id, context) - - # 4. Format and return - return self._format_context_for_llm(context) - + cached_context = frappe.get_doc("Assignment Context", cached_context[0].name).as_dict() + # remove fields where value is a datatime object + for key in list(cached_context.keys()): + if isinstance(cached_context[key], datetime): + cached_context.pop(key, None) + context = {"assignment": cached_context,} + + if not context: + # If not in cache or caching disabled, fetch from API + print("Fetching context from API...") + context = await self._fetch_assignment_from_api(assignment_id) + + # Save to cache if enabled + if self.settings.enable_caching: + print("Saving to cache...") + await self._save_to_cache(assignment_id, context) + + if context["assignment"]["rubrics"] is None: + print("Rubrics not found in assignment context.") + raise Exception("Rubrics missing in assignment context") + + if student_id is None: + raise Exception("Student ID is required to fetch student context") + + student_details = await self._fetch_student_from_api(student_id) + # student_details = { + # "student_id":"ST0001", + # "grade":"6", + # "level":"2", + # "language": "Hindi" + # } + context["student"] = {**student_details} + + # print("Assignment context",context) + return context + except Exception as e: error_msg = f"Error getting assignment context: {str(e)}" print(f"\nError: {error_msg}") frappe.log_error(error_msg, "Assignment Context Error") raise - async def _fetch_from_api(self, assignment_id: str) -> Dict: + async def _fetch_assignment_from_api(self, assignment_id: str) -> Dict: """Fetch assignment context from TAP LMS API""" try: # Construct API URL properly api_url = f"{self.settings.base_url.rstrip('/')}/{self.settings.assignment_context_endpoint.lstrip('/')}" - print(f"\nMaking API request to: {api_url}") payload = { "assignment_id": assignment_id } + response = requests.post( + api_url, + headers=self.headers, + json=payload, + timeout=30 + ) - print("\nRequest Details:") - print(f"Headers: {json.dumps({k: v if k != 'Authorization' else '[REDACTED]' for k, v in self.headers.items()}, indent=2)}") - print(f"Payload: {json.dumps(payload, indent=2)}") + if response.status_code != 200: + error_msg = f"API request failed with status {response.status_code}: {response.text}" + print(f"Error: {error_msg}") + raise Exception(error_msg) + + data = response.json() + # data = {'assignment': {'name': 'Pop Art', 'program_name': 'Summer Program 2026', 'description': 'Students will create a Pop Art–style artwork inspired by Andy Warhol by drawing one simple object four times on their paper and coloring each version using different pairs of complementary colors. They will experiment with bold outlines, bright contrasts, and simple background patterns to make their artwork look vibrant and balanced.', 'assignment_type': 'Written', 'activity_type': 'Regular', 'course_vertical': 'Arts', 'difficulty_tier': 'Remedial', 'submission_guidelines': '', 'submission_rules': [{'submission_title': 'Emoji', 'allowed_submission_types': ['emoji'], 'guided_text': 'Send a 👍 if you enjoyed the activity, or 👎 if you did not enjoy it', 'unguided_text': 'Please send any emoji of your choice!', 'valid_criteria': '👍 or 👎', 'invalid_criteria': 'Any emoji other than 👍 or 👎'}, {'submission_title': 'Type a word or send a voice note', 'allowed_submission_types': ['text', 'audio'], 'guided_text': 'Send a text or voice note saying "Creative" if you liked it, or "Boring" if you didn’t.', 'unguided_text': 'What do you think of the Pop Art activity? Share in one word or send a voice note', 'valid_criteria': 'Creative, Boring', 'invalid_criteria': 'Any word other than creative, boring'}, {'submission_title': 'Taking a picture of anything around you', 'allowed_submission_types': ['image'], 'guided_text': 'Share a picture of anything in red color', 'unguided_text': 'Share a picture of anything around you', 'valid_criteria': 'Picture of anything in red color', 'invalid_criteria': 'Anything other which is not red in color'}, {'submission_title': 'Send a voice/text summary Related to Artefact', 'allowed_submission_types': ['text', 'audio'], 'guided_text': 'Type or send a voice note and tell us one complementary color pair', 'unguided_text': '', 'valid_criteria': 'red–green, blue–orange, yellow–purple', 'invalid_criteria': 'wrong pair, single color, sentences'}, {'submission_title': 'Take a picture or video of the created artefact Related Artefact', 'allowed_submission_types': ['image', 'video'], 'guided_text': 'Don’t forget to check if:\n\nYou drew the same object 4 times\nYou used 2 complementary color pairs\nYou added bold outlines\nYour coloring is neat and filled', 'unguided_text': '', 'valid_criteria': '- Clear artwork using complementary colors\n- Neat coloring and bold outlines\n- Complete artwork visible\n\nAll criteria needs to be fulfilled', 'invalid_criteria': '- Missing complementary colors\n- Messy or uneven coloring\n- Incomplete artwork\n- Unclear/cropped image'}], 'reference_images': [], 'max_score': None, 'rubrics': {}}, 'learning_objectives': []} + # print("#############") + # print(data) + # print("#############") + print("API request successful") + return data + + except requests.RequestException as e: + error_msg = f"API request failed: {str(e)}" + print(f"\nError: {error_msg}") + raise Exception(error_msg) + + async def _fetch_student_from_api(self, student_id: str) -> Dict: + """Fetch student details from TAP LMS API""" + try: + # Construct API URL properly + api_url = f"{self.settings.base_url.rstrip('/')}/{self.settings.student_context_endpoint.lstrip('/')}" + payload = { + "student_id": student_id + } response = requests.post( api_url, headers=self.headers, @@ -79,19 +131,16 @@ async def _fetch_from_api(self, assignment_id: str) -> Dict: timeout=30 ) - print(f"\nResponse Status: {response.status_code}") - if response.status_code != 200: error_msg = f"API request failed with status {response.status_code}: {response.text}" print(f"Error: {error_msg}") raise Exception(error_msg) data = response.json() - if "message" not in data: - raise Exception("Invalid API response format") + # data = {'student_id': 'ST00000182', 'grade': '5', 'level': 'L1', 'language': 'Hindi'} - print("API request successful") - return data["message"] + print("Student API request successful") + return data except requests.RequestException as e: error_msg = f"API request failed: {str(e)}" @@ -110,13 +159,17 @@ async def _save_to_cache(self, assignment_id: str, context: Dict) -> None: learning_objectives = context.get("learning_objectives", []) # Parse assignment type properly - assignment_type = assignment.get("type", "Practical") + assignment_type = assignment.get("assignment_type", "Practical") + course_vertical = assignment.get("course_vertical", "General") + activity_type = assignment.get("activity_type") + program_name = assignment.get("program_name", "") + difficulty_tier = assignment.get("difficulty_tier", "") + submission_guidelines = assignment.get("submission_guidelines", "") + submission_rules = json.dumps(assignment.get("submission_rules", [])) + print(f"Parsed assignment type: {assignment_type}, course vertical: {course_vertical}, activity type: {activity_type}, program name: {program_name}, difficulty tier: {difficulty_tier}") # If type is empty or invalid, default to Practical - valid_types = ["Written", "Practical", "Performance", "Collaborative"] - if not assignment_type or assignment_type not in valid_types: - assignment_type = "Practical" - print(f"Invalid assignment type '{assignment.get('type')}', defaulting to 'Practical'") + # Prepare learning objectives JSON formatted_objectives = [] @@ -136,13 +189,6 @@ async def _save_to_cache(self, assignment_id: str, context: Dict) -> None: limit=1 ) - # Determine course vertical - course_vertical = "General" - if "subject" in assignment and assignment["subject"]: - subject_parts = assignment["subject"].split("-") - if len(subject_parts) > 1: - course_vertical = subject_parts[-1].strip() - if existing: # Update existing doc = frappe.get_doc("Assignment Context", existing[0].name) @@ -150,14 +196,20 @@ async def _save_to_cache(self, assignment_id: str, context: Dict) -> None: "assignment_name": assignment.get("name", ""), "course_vertical": course_vertical, "assignment_type": assignment_type, + "activity_type": activity_type, "reference_image": assignment.get("reference_image", ""), "description": assignment.get("description", ""), "learning_objectives": json.dumps(formatted_objectives), "max_score": assignment.get("max_score", "100"), + "program_name": program_name, + "difficulty_tier": difficulty_tier, + "submission_guidelines": submission_guidelines, + "submission_rules": submission_rules, "last_updated": now_datetime(), "cache_valid_till": cache_valid_till, "last_sync_status": "Success", - "version": (doc.version or 0) + 1 + "version": (doc.version or 0) + 1, + "rubrics": json.dumps(assignment.get("rubrics", {})) }) doc.save() print(f"Updated existing cache for assignment {assignment_id}") @@ -169,15 +221,21 @@ async def _save_to_cache(self, assignment_id: str, context: Dict) -> None: "assignment_name": assignment.get("name", ""), "course_vertical": course_vertical, "assignment_type": assignment_type, + "activity_type": activity_type, "reference_image": assignment.get("reference_image", ""), "description": assignment.get("description", ""), "learning_objectives": json.dumps(formatted_objectives), "max_score": assignment.get("max_score", "100"), "difficulty_level": "Medium", # Default value + "program_name": program_name, + "difficulty_tier": difficulty_tier, + "submission_guidelines": submission_guidelines, + "submission_rules": submission_rules, "last_updated": now_datetime(), "cache_valid_till": cache_valid_till, "last_sync_status": "Success", - "version": 1 + "version": 1, + "rubrics": json.dumps(assignment.get("rubrics", {})) }) doc.insert() print(f"Created new cache for assignment {assignment_id}") @@ -191,95 +249,13 @@ async def _save_to_cache(self, assignment_id: str, context: Dict) -> None: frappe.db.rollback() # Rollback on error raise Exception(error_msg) - async def _format_cached_context(self, context_name: str) -> Dict: - """Format cached context for LLM""" - try: - context = frappe.get_doc("Assignment Context", context_name) - - # Parse learning objectives safely - learning_objectives = [] - try: - if context.learning_objectives: - learning_objectives = json.loads(context.learning_objectives) - except (json.JSONDecodeError, TypeError) as e: - print(f"Error parsing learning objectives: {str(e)}") - # Create an empty default if parsing fails - learning_objectives = [] - - return { - "assignment": { - "id": context.assignment_id, - "name": context.assignment_name, - "type": context.assignment_type, - "description": context.description, - "max_score": context.max_score, - "reference_image": context.reference_image - }, - "learning_objectives": learning_objectives, - "course_vertical": context.course_vertical, - "difficulty_level": context.difficulty_level - } - - except Exception as e: - error_msg = f"Error formatting cached context: {str(e)}" - print(f"\nError: {error_msg}") - raise Exception(error_msg) - - def _format_context_for_llm(self, api_context: Dict) -> Dict: - """Format API context for LLM""" - try: - assignment = api_context["assignment"] - - # Determine course vertical - course_vertical = "General" - if "subject" in assignment and assignment["subject"]: - subject_parts = assignment["subject"].split("-") - if len(subject_parts) > 1: - course_vertical = subject_parts[-1].strip() - - # Parse assignment type correctly - assignment_type = assignment.get("type", "Practical") - valid_types = ["Written", "Practical", "Performance", "Collaborative"] - if not assignment_type or assignment_type not in valid_types: - assignment_type = "Practical" - - # Format learning objectives - learning_objectives = [] - if "learning_objectives" in api_context and api_context["learning_objectives"]: - learning_objectives = [ - { - "objective_id": obj.get("objective", "Unknown"), - "description": obj.get("description", "").strip() - } - for obj in api_context["learning_objectives"] - ] - - return { - "assignment": { - "id": assignment.get("name", ""), # Using name as ID - "name": assignment.get("name", ""), - "type": assignment_type, - "description": assignment.get("description", ""), - "max_score": assignment.get("max_score", "100"), - "reference_image": assignment.get("reference_image", "") - }, - "learning_objectives": learning_objectives, - "course_vertical": course_vertical, - "difficulty_level": "Medium" # Default value - } - - except Exception as e: - error_msg = f"Error formatting API context: {str(e)}" - print(f"\nError: {error_msg}") - raise Exception(error_msg) - async def refresh_cache(self, assignment_id: str) -> None: """Manually refresh cache for an assignment""" try: print(f"\n=== Refreshing Cache for Assignment: {assignment_id} ===") # Force fetch from API - context = await self._fetch_from_api(assignment_id) + context = await self._fetch_assignment_from_api(assignment_id) # Save to cache await self._save_to_cache(assignment_id, context) @@ -290,29 +266,3 @@ async def refresh_cache(self, assignment_id: str) -> None: error_msg = f"Error refreshing cache: {str(e)}" print(f"\nError: {error_msg}") raise Exception(error_msg) - - def verify_settings(self) -> Dict: - """Verify RAG Settings configuration""" - try: - results = { - "base_url": bool(self.settings.base_url), - "api_key": bool(self.settings.api_key), - "api_secret": bool(self.settings.get_password('api_secret')), - "endpoints": bool(self.settings.assignment_context_endpoint), - "cache_config": bool(self.settings.cache_duration_days is not None) - } - - missing = [k for k, v in results.items() if not v] - - return { - "status": "Valid" if not missing else "Invalid", - "missing_settings": missing, - "cache_enabled": self.settings.enable_caching, - "cache_duration": self.settings.cache_duration_days - } - - except Exception as e: - return { - "status": "Error", - "error": str(e) - } diff --git a/rag_service/core/context_fetcher.py b/rag_service/core/context_fetcher.py deleted file mode 100644 index d2d6edb..0000000 --- a/rag_service/core/context_fetcher.py +++ /dev/null @@ -1,208 +0,0 @@ -# rag_service/rag_service/core/context_fetcher.py - -import frappe -import json -import httpx -from datetime import datetime, timedelta -from typing import Dict, Optional -from urllib.parse import urljoin - -class AssignmentContextFetcher: - def __init__(self): - self.settings = frappe.get_single("RAG Settings") - self.api_url = urljoin( - self.settings.base_url, - self.settings.assignment_context_endpoint - ) - self.cache_duration = timedelta(days=self.settings.cache_duration_days) - self.enable_caching = self.settings.enable_caching - self.max_retries = 3 - self.retry_delay = 1 # seconds - - def _get_headers(self) -> Dict[str, str]: - """Get request headers with authentication""" - api_key = self.settings.api_key - api_secret = self.settings.get_password('api_secret') - - return { - "Authorization": f"token {api_key}:{api_secret}", - "Content-Type": "application/json" - } - - async def get_assignment_context(self, assignment_id: str) -> Dict: - """ - Get assignment context - first check cache, then fetch from API if needed - """ - try: - # Check cache if enabled - if self.enable_caching: - cached_context = self._get_cached_context(assignment_id) - if cached_context: - frappe.logger().debug(f"Cache hit for assignment {assignment_id}") - return cached_context - - # If not in cache or caching disabled, fetch from API - context_data = await self._fetch_from_api(assignment_id) - - # Cache if enabled - if self.enable_caching: - self._cache_context(assignment_id, context_data) - frappe.logger().debug(f"Cached context for assignment {assignment_id}") - - return context_data - - except Exception as e: - frappe.log_error( - message=f"Error fetching assignment context: {str(e)}", - title="Assignment Context Error" - ) - raise - - async def _fetch_from_api(self, assignment_id: str) -> Dict: - """Fetch context from TAP LMS API with retries""" - last_error = None - headers = self._get_headers() - - for attempt in range(self.max_retries): - try: - async with httpx.AsyncClient() as client: - response = await client.post( - self.api_url, - json={"assignment_id": assignment_id}, - headers=headers, - timeout=30.0 - ) - - if response.status_code == 401: - raise ValueError("Authentication failed - check API key and secret") - - response.raise_for_status() - - context_data = response.json() - if not context_data.get("message"): - raise ValueError("Invalid response format from API") - - frappe.logger().debug(f"Successfully fetched context for assignment {assignment_id}") - return context_data["message"] - - except httpx.HTTPStatusError as e: - last_error = f"HTTP error {e.response.status_code}: {str(e)}" - if e.response.status_code in [401, 403, 404]: # Don't retry auth or not found errors - break - await self._wait_before_retry(attempt) - - except httpx.RequestError as e: - last_error = f"Request error: {str(e)}" - await self._wait_before_retry(attempt) - - except Exception as e: - last_error = str(e) - await self._wait_before_retry(attempt) - - frappe.log_error( - message=f"Failed to fetch context after {self.max_retries} attempts: {last_error}", - title="API Error" - ) - raise Exception(f"Failed to fetch assignment context: {last_error}") - - async def _wait_before_retry(self, attempt: int): - """Exponential backoff for retries""" - import asyncio - wait_time = self.retry_delay * (2 ** attempt) # exponential backoff - await asyncio.sleep(wait_time) - - def _get_cached_context(self, assignment_id: str) -> Optional[Dict]: - """Check if we have a valid cached context""" - try: - cached = frappe.get_list( - "Assignment Context", - filters={ - "assignment_id": assignment_id, - "cache_valid_till": [">", datetime.now()], - "last_sync_status": "Success" - }, - order_by="version desc", - limit=1 - ) - - if not cached: - return None - - context_doc = frappe.get_doc("Assignment Context", cached[0].name) - return { - "assignment": { - "name": context_doc.assignment_name, - "description": context_doc.description, - "type": context_doc.assignment_type, - "subject": context_doc.course_vertical, - "submission_guidelines": context_doc.submission_guidelines, - "reference_image": context_doc.reference_image, - "max_score": context_doc.max_score, - }, - "learning_objectives": json.loads(context_doc.learning_objectives) - } - - except Exception as e: - frappe.log_error( - message=f"Error retrieving cached context: {str(e)}", - title="Cache Retrieval Error" - ) - return None - - def _cache_context(self, assignment_id: str, context_data: Dict) -> None: - """Store context in cache""" - try: - assignment = context_data["assignment"] - cache_valid_till = datetime.now() + self.cache_duration - - doc = frappe.get_doc({ - "doctype": "Assignment Context", - "assignment_id": assignment_id, - "assignment_name": assignment["name"], - "course_vertical": assignment["subject"], - "description": assignment["description"], - "submission_guidelines": assignment["submission_guidelines"], - "reference_image": assignment["reference_image"], - "learning_objectives": json.dumps(context_data["learning_objectives"]), - "max_score": assignment["max_score"], - "last_updated": datetime.now(), - "cache_valid_till": cache_valid_till, - "last_sync_status": "Success", - "version": 1 - }) - - existing_docs = frappe.get_list( - "Assignment Context", - filters={"assignment_id": assignment_id} - ) - - if existing_docs: - doc.name = existing_docs[0].name - doc.version = frappe.get_value("Assignment Context", existing_docs[0].name, "version") + 1 - - doc.save() - frappe.db.commit() - - except Exception as e: - frappe.log_error( - message=f"Error caching context: {str(e)}", - title="Cache Error" - ) - raise - - def invalidate_cache(self, assignment_id: str) -> None: - """Manually invalidate cache for an assignment""" - try: - frappe.db.sql(""" - UPDATE `tabAssignment Context` - SET cache_valid_till = NOW() - WHERE assignment_id = %s - """, (assignment_id,)) - frappe.db.commit() - - except Exception as e: - frappe.log_error( - message=f"Error invalidating cache: {str(e)}", - title="Cache Error" - ) - raise diff --git a/rag_service/core/embedding_utils.py b/rag_service/core/embedding_utils.py deleted file mode 100644 index afe81ed..0000000 --- a/rag_service/core/embedding_utils.py +++ /dev/null @@ -1,79 +0,0 @@ -# File: ~/frappe-bench/apps/rag_service/rag_service/core/embedding_utils.py - -import frappe -from sentence_transformers import SentenceTransformer -import numpy as np -import os -from frappe.utils import now_datetime -import json - -class EmbeddingManager: - def __init__(self): - self.model = None - self.model_name = 'all-MiniLM-L6-v2' - self.embedding_dimension = 384 - - def get_model(self): - if self.model is None: - self.model = SentenceTransformer(self.model_name) - return self.model - - def generate_embedding(self, text): - """Generate embedding for given text""" - model = self.get_model() - embedding = model.encode(text) - return embedding - - def save_embedding(self, reference_id, content, content_type="Submission"): - """Save embedding to Vector Store""" - try: - # Generate embedding - embedding = self.generate_embedding(content) - - # Create a file path for the embedding - site_path = frappe.get_site_path() - embedding_dir = os.path.join(site_path, 'private', 'files', 'embeddings') - os.makedirs(embedding_dir, exist_ok=True) - - file_path = os.path.join(embedding_dir, f"{content_type}_{reference_id}_{now_datetime().strftime('%Y%m%d_%H%M%S')}.npy") - - # Save the embedding to file - np.save(file_path, embedding) - - # Create Vector Store entry - vector_store = frappe.get_doc({ - "doctype": "Vector Store", - "content_type": content_type, - "reference_id": reference_id, - "content": content, - "embedding_file": os.path.relpath(file_path, site_path), - "created_at": now_datetime() - }) - - vector_store.insert() - frappe.db.commit() - - return vector_store.name - - except Exception as e: - frappe.log_error(f"Error saving embedding: {str(e)}") - raise - - def load_embedding(self, vector_store_name): - """Load embedding from Vector Store""" - try: - vector_store = frappe.get_doc("Vector Store", vector_store_name) - embedding_path = os.path.join(frappe.get_site_path(), vector_store.embedding_file) - - if not os.path.exists(embedding_path): - frappe.throw(f"Embedding file not found: {embedding_path}") - - embedding = np.load(embedding_path) - return embedding - - except Exception as e: - frappe.log_error(f"Error loading embedding: {str(e)}") - raise - -# Create a singleton instance -embedding_manager = EmbeddingManager() diff --git a/rag_service/core/feedback_generator.py b/rag_service/core/feedback_generator.py deleted file mode 100644 index 663db13..0000000 --- a/rag_service/core/feedback_generator.py +++ /dev/null @@ -1,135 +0,0 @@ -# File: ~/frappe-bench/apps/rag_service/rag_service/core/feedback_generator.py - -import frappe -from .embedding_utils import embedding_manager -from .vector_store import faiss_manager -import json -from datetime import datetime - -class FeedbackGenerator: - def __init__(self): - self.feedback_templates = { - "general": """ -Based on the submission content and similar examples, here's the feedback: - -Strengths: -{strengths} - -Areas for Improvement: -{improvements} - -Suggestions: -{suggestions} - -Overall Assessment: -{assessment} - """.strip(), - - "plagiarism_alert": """ -⚠️ Plagiarism Concern: -The submission shows significant similarity ({similarity_score:.2f}%) with existing content. -Please review and ensure original work. - -Similar Content Found: -{similar_content} - -Recommendation: -{recommendation} - """.strip() - } - - def generate_structured_feedback(self, submission_content, similar_contents, plagiarism_score=None): - """Generate structured feedback using RAG approach""" - try: - # Analyze strengths - strengths = self._analyze_strengths(submission_content) - - # Analyze areas for improvement - improvements = self._analyze_improvements(submission_content, similar_contents) - - # Generate suggestions - suggestions = self._generate_suggestions(submission_content, similar_contents) - - # Create feedback - feedback = self.feedback_templates["general"].format( - strengths="\n".join(f"- {s}" for s in strengths), - improvements="\n".join(f"- {i}" for i in improvements), - suggestions="\n".join(f"- {s}" for s in suggestions), - assessment=self._create_overall_assessment( - submission_content, - strengths, - improvements - ) - ) - - # Add plagiarism warning if score is high - if plagiarism_score and plagiarism_score > 0.8: - feedback += "\n\n" + self.feedback_templates["plagiarism_alert"].format( - similarity_score=plagiarism_score * 100, - similar_content=self._format_similar_content(similar_contents[:1]), - recommendation="Please revise your submission to ensure originality." - ) - - return { - "feedback": feedback, - "metadata": { - "strengths_count": len(strengths), - "improvements_count": len(improvements), - "suggestions_count": len(suggestions), - "has_plagiarism_warning": plagiarism_score > 0.8 if plagiarism_score else False, - "generated_at": str(datetime.now()) - } - } - - except Exception as e: - frappe.log_error(f"Error generating feedback: {str(e)}") - raise - - def _analyze_strengths(self, content): - """Analyze submission strengths""" - # Placeholder - Implement actual strength analysis - strengths = [ - "Clear presentation of concepts", - "Good structure and organization", - "Effective use of examples" - ] - return strengths - - def _analyze_improvements(self, content, similar_contents): - """Analyze areas for improvement""" - # Placeholder - Implement actual improvement analysis - improvements = [ - "Consider adding more detailed explanations", - "Include more specific examples", - "Expand on key concepts" - ] - return improvements - - def _generate_suggestions(self, content, similar_contents): - """Generate specific suggestions""" - # Placeholder - Implement actual suggestion generation - suggestions = [ - "Reference related topics to strengthen understanding", - "Include practical applications of concepts", - "Add visual representations where applicable" - ] - return suggestions - - def _create_overall_assessment(self, content, strengths, improvements): - """Create overall assessment""" - # Placeholder - Implement actual assessment logic - return "The submission demonstrates good understanding of the concepts while having room for enhancement in specific areas." - - def _format_similar_content(self, similar_contents): - """Format similar content for feedback""" - if not similar_contents: - return "No similar content found" - - formatted = [] - for content in similar_contents: - formatted.append(f"- Content: {content['content']}\n Similarity: {content['similarity_score']:.2%}") - - return "\n".join(formatted) - -# Create singleton instance -feedback_generator = FeedbackGenerator() diff --git a/rag_service/handlers/feedback_handler.py b/rag_service/core/feedback_handler.py similarity index 74% rename from rag_service/handlers/feedback_handler.py rename to rag_service/core/feedback_handler.py index b4b2e79..d3722f3 100644 --- a/rag_service/handlers/feedback_handler.py +++ b/rag_service/core/feedback_handler.py @@ -1,18 +1,17 @@ -# rag_service/rag_service/handlers/feedback_handler.py +# rag_service/rag_service/core/feedback_handler.py import frappe import json from datetime import datetime from typing import Dict, Optional -from ..core.langchain_manager import LangChainManager -from ..core.feedback_processor import FeedbackProcessor +from ..core.feedback_service import FeedbackService from ..core.assignment_context_manager import AssignmentContextManager from ..utils.queue_manager import QueueManager +from ..utils.submission_data import build_submission_content, normalize_submission_payload class FeedbackHandler: def __init__(self): - self.langchain_manager = LangChainManager() - self.feedback_processor = FeedbackProcessor() + self.feedback_service = FeedbackService() self.queue_manager = QueueManager() self.assignment_context_manager = AssignmentContextManager() @@ -20,17 +19,15 @@ async def handle_submission(self, message_data: Dict) -> None: """Handle a new submission from plagiarism queue""" request_id = None try: - print("\n=== Processing New Submission ===") - print(f"Submission ID: {message_data.get('submission_id')}") - + submission_data = normalize_submission_payload(message_data) + # Create or update feedback request - request_id = await self.create_feedback_request(message_data) + request_id = await self.create_feedback_request(message_data, submission_data) print(f"\nFeedback Request Created/Updated: {request_id}") # Get assignment context - print(f"\nFetching assignment context for: {message_data['assignment_id']}") assignment_context = await self.assignment_context_manager.get_assignment_context( - message_data["assignment_id"] + message_data["assignment_id"], message_data["student_id"] ) if not assignment_context: @@ -38,15 +35,17 @@ async def handle_submission(self, message_data: Dict) -> None: print("\nGenerating feedback...") # Generate feedback - feedback = await self.langchain_manager.generate_feedback( + feedback, model_used, template_used = await self.feedback_service.generate_feedback( assignment_context=assignment_context, - submission_url=message_data["img_url"], - submission_id=request_id + submission_data=submission_data, + submission_id=request_id, + plagiarism_data=message_data, + feedback_request_id=request_id ) print("\nFeedback generated, processing feedback...") # Process and deliver feedback - await self.feedback_processor.process_feedback(request_id, feedback) + await self.feedback_service.process_feedback(request_id, feedback, model_used, template_used) print("\nFeedback processing completed") except Exception as e: @@ -59,7 +58,7 @@ async def handle_submission(self, message_data: Dict) -> None: await self.mark_request_failed(request_id, str(e)) raise - async def create_feedback_request(self, message_data: Dict) -> str: + async def create_feedback_request(self, message_data: Dict, submission_data: Dict) -> str: """Create or update feedback request""" try: print("\n=== Creating/Updating Feedback Request ===") @@ -81,6 +80,10 @@ async def create_feedback_request(self, message_data: Dict) -> str: feedback_request.processing_attempts += 1 feedback_request.status = "Processing" feedback_request.error_log = None # Clear previous errors + feedback_request.submission_type = submission_data["submission_type"] + feedback_request.submission_url = submission_data["submission_url"] + feedback_request.submission_text = submission_data["submission_text"] + feedback_request.submission_content = build_submission_content(submission_data) feedback_request.save() else: @@ -90,9 +93,18 @@ async def create_feedback_request(self, message_data: Dict) -> str: "submission_id": message_data["submission_id"], "student_id": message_data["student_id"], "assignment_id": message_data["assignment_id"], - "submission_content": message_data["img_url"], + "submission_type": submission_data["submission_type"], + "submission_url": submission_data["submission_url"], + "submission_text": submission_data["submission_text"], + "submission_content": build_submission_content(submission_data), "plagiarism_score": message_data.get("plagiarism_score", 0.0), + "is_plagiarized": message_data.get("is_plagiarized", False), + "plagiarism_source": message_data.get("plagiarism_source", "none"), + "match_type": message_data.get("match_type", "original"), + "is_ai_generated": message_data.get("is_ai_generated", False), + "ai_confidence": message_data.get("ai_confidence", 0.0), "similar_sources": json.dumps(message_data.get("similar_sources", [])), + "ai_detection_source": message_data.get("ai_detection_source", "unknown"), "status": "Processing", "created_at": datetime.now(), "processing_attempts": 1 @@ -103,8 +115,6 @@ async def create_feedback_request(self, message_data: Dict) -> str: # Explicitly commit the transaction frappe.db.commit() - - print(f"Feedback Request Created/Updated Successfully: {request_id}") return request_id except Exception as e: @@ -172,40 +182,6 @@ async def get_request_status(self, request_id: str) -> Dict: "status": "Unknown" } - async def retry_failed_request(self, request_id: str) -> None: - """Retry a failed feedback request""" - try: - print(f"\n=== Retrying Failed Request: {request_id} ===") - - feedback_request = frappe.get_doc("Feedback Request", request_id) - - if feedback_request.status != "Failed": - raise ValueError(f"Request {request_id} is not in failed state") - - if feedback_request.processing_attempts >= 3: - raise ValueError(f"Maximum retry attempts reached for request {request_id}") - - # Prepare message data for reprocessing - message_data = { - "submission_id": feedback_request.submission_id, - "student_id": feedback_request.student_id, - "assignment_id": feedback_request.assignment_id, - "img_url": feedback_request.submission_content, - "plagiarism_score": feedback_request.plagiarism_score, - "similar_sources": json.loads(feedback_request.similar_sources or '[]') - } - - # Process the request again - await self.handle_submission(message_data) - - print(f"Request {request_id} retried successfully") - - except Exception as e: - error_msg = f"Error retrying request: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(error_msg, "Request Retry Error") - raise - async def cleanup_old_requests(self, days: int = 30) -> None: """Clean up old completed requests""" try: diff --git a/rag_service/core/feedback_processor.py b/rag_service/core/feedback_processor.py deleted file mode 100644 index da459ec..0000000 --- a/rag_service/core/feedback_processor.py +++ /dev/null @@ -1,159 +0,0 @@ -# rag_service/rag_service/core/feedback_processor.py - -import frappe -import json -from datetime import datetime -from typing import Dict, Optional -from ..utils.queue_manager import QueueManager - -class FeedbackProcessor: - def __init__(self): - self.queue_manager = QueueManager() - - async def process_feedback(self, request_id: str, feedback: Dict) -> None: - """Process and store feedback in Feedback Request DocType""" - try: - print(f"\n=== Processing Feedback for Request: {request_id} ===") - - # Get the feedback request document - feedback_request = frappe.get_doc("Feedback Request", request_id) - print(f"Found Feedback Request: {feedback_request.name}") - - # Format feedback for display - formatted_feedback = self.format_feedback_for_display(feedback) - - print("\nUpdating Feedback Request fields...") - # Update document fields using db_set - feedback_request.db_set('status', 'Completed', update_modified=True) - feedback_request.db_set('generated_feedback', json.dumps(feedback, indent=2), update_modified=True) - feedback_request.db_set('feedback_summary', formatted_feedback, update_modified=True) - feedback_request.db_set('completed_at', datetime.now(), update_modified=True) - - # Get and set LLM model info - llm_settings = frappe.get_list( - "LLM Settings", - filters={"is_active": 1}, - limit=1 - ) - if llm_settings: - feedback_request.db_set('model_used', llm_settings[0].name, update_modified=True) - - # FIXED: Get universal template (no assignment_type filtering) - print("Getting universal template...") - try: - # Get any active template (same logic as langchain_manager.py) - templates = frappe.get_list( - "Prompt Template", - filters={"is_active": 1}, # Only filter by active status - order_by="version desc", - limit=1 - ) - - if templates: - feedback_request.db_set('template_used', templates[0].name, update_modified=True) - print(f"Using universal template: {templates[0].name}") - else: - print("No active template found - leaving template_used empty") - feedback_request.db_set('template_used', '', update_modified=True) - - except Exception as template_error: - print(f"Error getting template: {str(template_error)}") - # Don't fail the entire process for template tracking issues - feedback_request.db_set('template_used', '', update_modified=True) - - # Commit changes - frappe.db.commit() - - # Verify the update - updated_doc = frappe.get_doc("Feedback Request", request_id) - print("\nVerification after update:") - print(f"Status: {updated_doc.status}") - print(f"Has Generated Feedback: {bool(updated_doc.generated_feedback)}") - print(f"Has Feedback Summary: {bool(updated_doc.feedback_summary)}") - print(f"Template Used: {updated_doc.template_used}") - - # Prepare and send message to TAP LMS - message = { - "submission_id": feedback_request.submission_id, - "student_id": feedback_request.student_id, - "assignment_id": feedback_request.assignment_id, - "feedback": feedback, - "summary": formatted_feedback, - "generated_at": feedback_request.completed_at.isoformat() if feedback_request.completed_at else datetime.now().isoformat(), - "plagiarism_score": feedback_request.plagiarism_score, - "similar_sources": json.loads(feedback_request.similar_sources or '[]') - } - - # Send to TAP LMS queue - self.queue_manager.send_feedback_to_tap(message) - - print(f"\nFeedback processed and sent for request: {request_id}") - - except Exception as e: - error_msg = f"Error processing feedback: {str(e)}" - print(f"\nError: {error_msg}") - - try: - if 'feedback_request' in locals() and feedback_request: - feedback_request.db_set('status', 'Failed', update_modified=True) - feedback_request.db_set('error_log', error_msg, update_modified=True) - frappe.db.commit() - print(f"Request {request_id} marked as failed") - except Exception as save_error: - print(f"Error saving failure status: {str(save_error)}") - - frappe.log_error(error_msg, "Feedback Processing Error") - raise - - def format_feedback_for_display(self, feedback: Dict) -> str: - """Format feedback for human-readable display""" - try: - formatted = [] - - # Standard fields - if "overall_feedback" in feedback: - formatted.append("Overall Feedback:") - formatted.append(feedback["overall_feedback"]) - - if "strengths" in feedback: - formatted.append("\nStrengths:") - for strength in feedback["strengths"]: - formatted.append(f"- {strength}") - - if "areas_for_improvement" in feedback: - formatted.append("\nAreas for Improvement:") - for area in feedback["areas_for_improvement"]: - formatted.append(f"- {area}") - - if "learning_objectives_feedback" in feedback: - formatted.append("\nLearning Objectives Feedback:") - for obj in feedback["learning_objectives_feedback"]: - formatted.append(f"- {obj}") - - if "grade_recommendation" in feedback: - formatted.append(f"\nGrade Recommendation: {feedback['grade_recommendation']}") - - if "encouragement" in feedback: - formatted.append(f"\nEncouragement: {feedback['encouragement']}") - - # Include any additional fields not in the standard format - standard_fields = ["overall_feedback", "strengths", "areas_for_improvement", - "learning_objectives_feedback", "grade_recommendation", - "encouragement", "detected_type", "error"] - - # Process any custom fields in the feedback - for key, value in feedback.items(): - if key not in standard_fields: - formatted.append(f"\n{key.replace('_', ' ').title()}:") - if isinstance(value, list): - for item in value: - formatted.append(f"- {item}") - else: - formatted.append(str(value)) - - return "\n".join(formatted) - - except Exception as e: - error_msg = f"Error formatting feedback: {str(e)}" - print(f"\nError: {error_msg}") - return "Error formatting feedback for display. Please check the JSON feedback data." diff --git a/rag_service/core/feedback_service.py b/rag_service/core/feedback_service.py new file mode 100644 index 0000000..852728e --- /dev/null +++ b/rag_service/core/feedback_service.py @@ -0,0 +1,335 @@ +# rag_service/rag_service/core/feedback_service.py + +import frappe +import json +from datetime import datetime +from typing import Dict +from ..feedback_utils.evaluation_generation import EvaluationGenerator +from ..utils.queue_manager import QueueManager + +class FeedbackService: + def __init__(self): + self.evaluation_generator = EvaluationGenerator(self) + self.queue_manager = QueueManager() + self.model_used = None + + async def generate_feedback( self, assignment_context: Dict, submission_data: Dict, submission_id: str, + plagiarism_data: Dict = None, feedback_request_id: str = None) -> Dict: + """Generate feedback with plagiarism context""" + + result_status = "Pending" + model_used = "N/A" + tempalate_used = "N/A" + + try: + # Check for plagiarism/AI-generated content first + if plagiarism_data: + is_plagiarized = plagiarism_data.get("is_plagiarized", False) + is_ai_generated = plagiarism_data.get("is_ai_generated", False) + match_type = plagiarism_data.get("match_type", "original") + + # Handle AI-generated submissions + if is_ai_generated: + result_status = "Success - Flagged" + feedback = self._create_ai_generated_feedback( + plagiarism_data + ) + tempalate_used = "Feedback Template for AI Generated Submission" + + # Handle plagiarized submissions + elif is_plagiarized: + result_status = "Success - Flagged" + feedback = self._create_plagiarism_feedback( + plagiarism_data + ) + tempalate_used = "Feedback Template for Plagiarized Submission" + + # Continue with normal feedback generation for original work + else: + result_status = "Success - Original" + feedback, model_used, tempalate_used = await self.evaluation_generator.generate_ai_feedback( + assignment_context, submission_data, submission_id + ) + + feedback["translation_language"] = assignment_context["student"].get("language", "English") + await self._update_result_status(feedback_request_id, result_status) + return feedback, model_used, tempalate_used + + except Exception as e: + result_status = "Failed" + await self._update_result_status(feedback_request_id, result_status, str(e)) + raise + + async def process_feedback( + self, request_id: str, feedback: Dict, model_used: str, template_used: str + ) -> None: + """Process and store feedback in Feedback Request DocType.""" + try: + print(f"\n=== Processing Feedback for Request: {request_id} ===") + + # Get the feedback request document + feedback_request = frappe.get_doc("Feedback Request", request_id) + print(f"Found Feedback Request: {feedback_request.name}") + + print("\nUpdating Feedback Request fields...") + # Update document fields using db_set + feedback_request.db_set("status", "Completed", update_modified=True) + feedback_request.db_set( + "generated_feedback", + json.dumps(feedback, indent=2, ensure_ascii=False), + update_modified=True, + ) + feedback_request.db_set( + "feedback_summary", feedback["overall_feedback"], update_modified=True + ) + feedback_request.db_set("completed_at", datetime.now(), update_modified=True) + feedback_request.db_set("model_used", model_used, update_modified=True) + feedback_request.db_set("template_used", template_used, update_modified=True) + + # Commit changes + frappe.db.commit() + + # Verify the update + updated_doc = frappe.get_doc("Feedback Request", request_id) + # Prepare and send message to TAP LMS + message = { + "submission_id": feedback_request.submission_id, + "student_id": feedback_request.student_id, + "assignment_id": feedback_request.assignment_id, + "feedback": feedback, + # "is_plagiarized": feedback['plagiarism_output']['is_plagiarized'], + # "is_ai_generated": feedback['plagiarism_output']['is_ai_generated'], + # "match_type": feedback['plagiarism_output']['match_type'], + # "plagiarism_source": feedback['plagiarism_output']['plagiarism_source'], + # "similarity_score": feedback['plagiarism_output']['similarity_score'], + # "ai_detection_source": feedback['plagiarism_output']['ai_detection_source'], + # "ai_confidence": feedback['plagiarism_output']['ai_confidence'], + + "generated_at": feedback_request.completed_at.isoformat() + if feedback_request.completed_at + else datetime.now().isoformat(), + } + + # Send to TAP LMS queue + self.queue_manager.send_feedback_to_tap(message) + + print(f"\nFeedback processed and sent for request: {request_id}") + + print("Payload sent to TAP LMS queue:") + print(json.dumps(message, indent=2, ensure_ascii=False)) + + except Exception as e: + error_msg = f"Error processing feedback: {str(e)}" + print(f"\nError: {error_msg}") + + try: + if "feedback_request" in locals() and feedback_request: + feedback_request.db_set("status", "Failed", update_modified=True) + feedback_request.db_set("error_log", error_msg, update_modified=True) + frappe.db.commit() + print(f"Request {request_id} marked as failed") + except Exception as save_error: + print(f"Error saving failure status: {str(save_error)}") + + frappe.log_error(error_msg, "Feedback Processing Error") + raise + + async def _update_result_status(self, feedback_request_id: str, status: str, error_message: str = None): + """Update Feedback Request result_status""" + if not feedback_request_id: + return + + update_data = {"result_status": status} + if error_message: + update_data["error_message"] = error_message[:500] # Truncate long errors + + frappe.db.set_value( + "Feedback Request", + feedback_request_id, + update_data, + update_modified=True + ) + frappe.db.commit() + + def _create_ai_generated_feedback(self, plagiarism_data: Dict) -> Dict: + """Create feedback for AI-generated submissions""" + + ai_source = plagiarism_data.get("ai_detection_source", "unknown") + ai_confidence = plagiarism_data.get("ai_confidence", 0.0) + response = { + "overall_feedback": "Hi Champ, I found you have sent AI created work. Please send your work. I'm excited to see what you made.", + "overall_feedback_translated": "Your submission appears to be generated by an AI tool. \ + At MentorMe, we encourage original creative work that reflects your own learning \ + and artistic development. AI-generated images, while interesting, don't demonstrate \ + the skills and creativity we're looking to nurture. Please submit your own original \ + artwork for this assignment.", + "strengths": ["N/A - AI-generated content detected."], + "areas_for_improvement": ["Submit original artwork created by you", + "Review assignment guidelines for creative direction"], + "learning_objectives_feedback": ["N/A - AI-generated content detected."], + "final_grade": 0, + "encouragement": "We believe in your creative abilities!", + "rubric_evaluations": [{ + "Skill": "Content Knowledge", + "grade_value": 0, + "observation": "N/A - AI-generated content detected." + }], + "plagiarism_output": { + "stock_audio_file": "invalid_submission_ai", + "is_plagiarized": False, + "is_ai_generated": True, + "match_type": "ai_generated", + "plagiarism_source": "none", + "similarity_score": 0.0, + "ai_detection_source": ai_source, + "ai_confidence": ai_confidence, + } + } + + return response + + def _create_plagiarism_feedback( self, plagiarism_data: Dict) -> Dict: + """Create feedback for plagiarized submissions""" + + match_type = plagiarism_data.get("match_type") + plagiarism_source = plagiarism_data.get("plagiarism_source") + similarity_score = plagiarism_data.get("similarity_score", 0.0) + ai_confidence = plagiarism_data.get("ai_confidence", 0.0) + + if plagiarism_source =="peer": + feedback = "Hi Champ, I found you have sent another student's work. Please send your work. No cheating this time. Excited to see what you made." + elif plagiarism_source =="self": + feedback = "Hi Champ, I found you have sent the same work again. Please resend the final work. Excited to see what you made." + elif plagiarism_source == "reference": + feedback = "Hi Champ, I found you have sent work that closely matches reference materials. Please submit your own original work. I'm excited to see your unique creation!" + else: + feedback = "Hi Champ, I found you have sent work that closely matches existing content. Please submit your own original work. I'm excited to see your unique creation!" + # respond with structured feedback + response = { + "overall_feedback": feedback, + "overall_feedback_translated": feedback, + "strengths": ["N/A - Submission flagged for similarity"], + "areas_for_improvement": ["Create original artwork for this assignment", + "Review academic integrity guidelines"], + "learning_objectives_feedback": ["N/A - Submission flagged for similarity"], + "final_grade": 0, + "encouragement": "Every artist develops their unique style through practice!", + "rubric_evaluations": [{ + "Skill": "Content Knowledge", + "grade_value": 0, + "observation": "N/A - Submission flagged for similarity." + }, + { + "Skill": "Creativity", + "grade_value": 0, + "observation": "N/A - Submission flagged for similarity." + }], + "plagiarism_output": { + "stock_audio_file": "invalid_submission_ai", + "is_plagiarized": True, + "is_ai_generated": False, + "match_type": match_type, + "plagiarism_source": plagiarism_source, + "similarity_score": similarity_score, + "ai_detection_source": "none", + "ai_confidence": ai_confidence, + } + } + + return response + + def validate_feedback_structure(self, feedback: Dict, expected_format: Dict) -> Dict: + """Ensure feedback has all required fields with correct types""" + # Ensure all expected fields are present + for field in expected_format: + if field not in feedback: + if isinstance(expected_format[field], list): + feedback[field] = ["No information provided"] + elif isinstance(expected_format[field], (int, float)): + feedback[field] = 0 + else: + feedback[field] = "No information provided" + + # Validate final_grade format for TAP LMS compatibility + try: + grade = feedback.get("final_grade", 0) + if isinstance(grade, str): + # Extract numeric part only + grade_clean = ''.join(c for c in grade if c.isdigit() or c == '.') + grade = float(grade_clean) if grade_clean else 0 + feedback["final_grade"] = max(0, min(100, float(grade))) + except (ValueError, TypeError): + feedback["final_grade"] = 0 + + # Ensure list fields are lists + list_fields = ["strengths", "areas_for_improvement", "learning_objectives_feedback"] + for field in list_fields: + if field in feedback and not isinstance(feedback[field], list): + feedback[field] = [str(feedback[field])] + + return feedback + + def create_fallback_feedback(self, expected_format: Dict) -> Dict: + """Create structured fallback when JSON parsing fails""" + + fallback = {} + for field, default_value in expected_format.items(): + if field == "overall_feedback": + fallback[field] = "I encountered a system error while processing your submission. This appears to be a technical issue on our end. Please try resubmitting, and if the issue persists, contact your instructor." + elif field == "overall_feedback_translated": + fallback[field] = "I encountered a system error while processing your submission. This appears to be a technical issue on our end. Please try resubmitting, and if the issue persists, contact your instructor." + elif field == "final_grade": + fallback[field] = 50 # Neutral grade for technical issues + elif field == "rubric_evaluations": + fallback[field] = [ + { + "skill": "Content Knowledge", + "grade_value": 2, + "observation": "Neutral evaluation due to processing issue" + } + ] + elif isinstance(default_value, list): + if "strength" in field: + fallback[field] = ["Your submission was received and processed"] + elif "improvement" in field: + fallback[field] = ["Please ensure your submission clearly shows your work"] + else: + fallback[field] = ["Unable to provide specific feedback due to processing issue"] + else: + if field == "encouragement": + fallback[field] = "Technical issues don't reflect your effort - please try resubmitting!" + else: + fallback[field] = "Processing issue - please resubmit for detailed feedback" + + return fallback + + def create_error_feedback(self, error_msg: str) -> Dict: + """Create feedback for system errors""" + + feedback = { + "overall_feedback": "I encountered a system error while processing your submission. This appears to be a technical issue on our end. Please try resubmitting, and if the issue persists, contact your instructor.", + "overall_feedback_translated": "I encountered a system error while processing your submission. This appears to be a technical issue on our end. Please try resubmitting, and if the issue persists, contact your instructor.", + "strengths": [f"Your submission was received successfully but system encountered an error during processing.{error_msg}"], + "areas_for_improvement": ["No issues identified with your submission - this appears to be a technical problem"], + "learning_objectives_feedback": ["Unable to evaluate due to system error - please resubmit"], + "final_grade": 0, + "encouragement": "Technical issues don't reflect your effort or ability - please try again!", + "rubric_evaluations": [{ + "Skill": "Content Knowledge", + "grade_value": 2, + "observation": "Neutral evaluation due to processing issue" + }], + } + plagiarism_output = { + "is_plagiarized": False, + "is_ai_generated": False, + "match_type": "original", + "plagiarism_source": "none", + "similarity_score": 0.0, + "ai_detection_source": "none", + "ai_confidence": 0.0, + "similar_sources": [] + } + feedback["plagiarism_output"] = plagiarism_output + + return feedback diff --git a/rag_service/core/langchain_manager.py b/rag_service/core/langchain_manager.py deleted file mode 100644 index 29b719d..0000000 --- a/rag_service/core/langchain_manager.py +++ /dev/null @@ -1,382 +0,0 @@ -# rag_service/rag_service/core/langchain_manager.py - -import frappe -import json -from typing import Dict, List, Optional, Union -from datetime import datetime -from .llm_providers import create_llm_provider, OpenAIProvider - -class LangChainManager: - def __init__(self): - self.llm = None - self.llm_provider = None - self.setup_llm() - - def setup_llm(self): - """Initialize LLM based on settings""" - try: - llm_settings = frappe.get_list( - "LLM Settings", - filters={"is_active": 1}, - limit=1 - ) - - if not llm_settings: - raise Exception("No active LLM configuration found") - - settings = frappe.get_doc("LLM Settings", llm_settings[0].name) - print("\nUsing LLM Settings:") - print(f"Provider: {settings.provider}") - print(f"Model: {settings.model_name}") - - # Create LLM provider based on settings - self.llm_provider = create_llm_provider( - provider=settings.provider, - api_key=settings.get_password('api_secret'), - model_name=settings.model_name, - temperature=settings.temperature, - max_tokens=settings.max_tokens - ) - - # Keep the llm reference for backward compatibility with OpenAI - if isinstance(self.llm_provider, OpenAIProvider): - self.llm = self.llm_provider.llm - - except Exception as e: - error_msg = f"LLM Setup Error: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(error_msg, "LLM Setup Error") - raise - - def clean_json_response(self, response: str) -> str: - """Clean JSON response from various formats""" - try: - # Remove markdown code blocks if present - if "```json" in response: - response = response.split("```json")[1].split("```")[0].strip() - elif "```" in response: - code_blocks = response.split("```") - if len(code_blocks) >= 3: # At least one code block exists - response = code_blocks[1].strip() - # Check if the extracted content looks like JSON - if not (response.startswith('{') or response.startswith('[')): - # If not, try to find JSON in the original response - json_start = response.find('{') - if json_start >= 0: - response = response[json_start:] - - # Try to extract JSON if response starts with explanation - if not response.strip().startswith('{'): - json_start = response.find('{') - if json_start >= 0: - response = response[json_start:] - - # Check if the response ends properly - if not response.strip().endswith('}'): - json_end = response.rfind('}') - if json_end >= 0: - response = response[:json_end+1] - - return response.strip() - except Exception as e: - print(f"Error cleaning JSON: {str(e)}") - return response - - def get_universal_template(self) -> Dict: - """Get any active template - no assignment_type filtering""" - try: - print("\n=== Getting Universal Template ===") - - # REMOVED: assignment_type filtering - get ANY active template - templates = frappe.get_list( - "Prompt Template", - filters={"is_active": 1}, - order_by="version desc", - limit=1 - ) - - if templates: - template = frappe.get_doc("Prompt Template", templates[0].name) - print(f"Using universal template: {template.template_name}") - - # Update the last_used timestamp - template.db_set('last_used', datetime.now()) - frappe.db.commit() - - return template - else: - print("No active template found, using built-in default") - return self.get_builtin_template() - - except Exception as e: - error_msg = f"Template Error: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(error_msg, "Template Error") - return self.get_builtin_template() - - def get_builtin_template(self): - """Return built-in default template as fallback""" - class BuiltinTemplate: - def __init__(self): - self.template_name = "Built-in Universal Template" - self.system_prompt = """You are an expert educational feedback assistant that provides constructive, age-appropriate feedback on student submissions across all subjects and assignment types. You adapt your evaluation criteria and language based on the assignment context provided. - -CRITICAL: You must ALWAYS respond with valid JSON, never plain text.""" - - self.user_prompt = """Assignment Context: -Assignment Name: {assignment_name} -Subject Area: {course_vertical} -Assignment Type: {assignment_type} -Description: {assignment_description} - -Learning Objectives: -{learning_objectives} - -Please analyze this student submission and provide feedback in the required JSON format.""" - - self.response_format = """{ - "overall_feedback": "Comprehensive feedback about the submission", - "strengths": ["Specific strength 1", "Specific strength 2", "Specific strength 3"], - "areas_for_improvement": ["Improvement area 1", "Improvement area 2"], - "learning_objectives_feedback": ["Feedback on learning objective 1"], - "grade_recommendation": 85, - "encouragement": "Encouraging message for the student" -}""" - - return BuiltinTemplate() - - def format_objectives(self, objectives: List[Dict]) -> str: - """Format learning objectives for prompt""" - if not objectives: - return "No specific learning objectives provided for this assignment." - - formatted = [] - for i, obj in enumerate(objectives, 1): - if isinstance(obj, dict): - description = obj.get('description', obj.get('objective_id', 'Unknown objective')) - else: - description = str(obj) - formatted.append(f"{i}. {description}") - - return "\n".join(formatted) - - def get_default_response_format(self) -> Dict: - """Get default response format""" - return { - "overall_feedback": "Overall assessment of the submission", - "strengths": ["Strength 1", "Strength 2", "Strength 3"], - "areas_for_improvement": ["Area 1", "Area 2"], - "learning_objectives_feedback": ["Feedback on objective 1"], - "grade_recommendation": 75, - "encouragement": "Encouraging message for the student" - } - - async def generate_feedback(self, assignment_context: Dict, submission_url: str, submission_id: str) -> Dict: - """Generate feedback using universal template approach""" - try: - print("\n=== Starting Universal Feedback Generation ===") - - # Get universal template (no assignment_type filtering) - template = self.get_universal_template() - print("Template loaded successfully") - - # Get expected response format from template or use default - try: - if hasattr(template, 'response_format') and template.response_format: - expected_format = json.loads(template.response_format) - print("Using template-defined response format") - else: - expected_format = self.get_default_response_format() - print("Using default response format") - except json.JSONDecodeError: - expected_format = self.get_default_response_format() - print("Failed to parse template response format, using default") - - # Format learning objectives - learning_objectives = self.format_objectives(assignment_context.get("learning_objectives", [])) - - # SIMPLIFIED: Use template directly without complex modifications - # The universal template handles all subject types internally - - # Format user prompt with assignment context - user_prompt_vars = { - "assignment_name": assignment_context["assignment"].get("name", ""), - "assignment_description": assignment_context["assignment"].get("description", ""), - "course_vertical": assignment_context.get("course_vertical", "General"), - "assignment_type": assignment_context["assignment"].get("type", "Practical"), - "learning_objectives": learning_objectives - } - - # Format the user prompt with available variables - formatted_user_prompt = template.user_prompt - for key, value in user_prompt_vars.items(): - placeholder = "{" + key + "}" - if placeholder in formatted_user_prompt: - formatted_user_prompt = formatted_user_prompt.replace(placeholder, str(value)) - - # Use template system prompt as-is (it already handles JSON requirement) - system_prompt = template.system_prompt - - # Prepare messages for the LLM provider - messages = self.llm_provider.format_messages( - system_prompt=system_prompt, - user_prompt=formatted_user_prompt, - image_url=submission_url - ) - - print(f"\nAssignment: {assignment_context['assignment'].get('name', 'Unknown')}") - print(f"Subject: {assignment_context.get('course_vertical', 'General')}") - print(f"Type: {assignment_context['assignment'].get('type', 'Unknown')}") - print("\nSending request to LLM...") - - # Generate feedback - SINGLE LLM CALL (no separate validation) - raw_text = await self.llm_provider.generate_with_vision(messages) - print(f"\nRaw LLM Response: {raw_text}") - - try: - # Clean up the response text - cleaned_text = self.clean_json_response(raw_text) - print(f"\nCleaned Response Text: {cleaned_text}") - - feedback = json.loads(cleaned_text) - print("\nSuccessfully parsed JSON response") - - # Validate and ensure required fields - feedback = self.validate_feedback_structure(feedback, expected_format) - - except json.JSONDecodeError as e: - print(f"\nJSON Parse Error: {str(e)}") - print("Using fallback feedback format") - - # Create structured fallback response - feedback = self.create_fallback_feedback(assignment_context, expected_format) - - print("\n=== Feedback Generation Completed Successfully ===") - return feedback - - except Exception as e: - error_msg = f"Error generating feedback for submission {submission_id}: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(message=error_msg, title="Feedback Generation Error") - - # Return structured error response - return self.create_error_feedback(assignment_context) - - def validate_feedback_structure(self, feedback: Dict, expected_format: Dict) -> Dict: - """Ensure feedback has all required fields with correct types""" - # Ensure all expected fields are present - for field in expected_format: - if field not in feedback: - if isinstance(expected_format[field], list): - feedback[field] = ["No information provided"] - elif isinstance(expected_format[field], (int, float)): - feedback[field] = 0 - else: - feedback[field] = "No information provided" - - # Validate grade_recommendation format for TAP LMS compatibility - try: - grade = feedback.get("grade_recommendation", 0) - if isinstance(grade, str): - # Extract numeric part only - grade_clean = ''.join(c for c in grade if c.isdigit() or c == '.') - grade = float(grade_clean) if grade_clean else 0 - feedback["grade_recommendation"] = max(0, min(100, float(grade))) - except (ValueError, TypeError): - feedback["grade_recommendation"] = 0 - - # Ensure list fields are lists - list_fields = ["strengths", "areas_for_improvement", "learning_objectives_feedback"] - for field in list_fields: - if field in feedback and not isinstance(feedback[field], list): - feedback[field] = [str(feedback[field])] - - return feedback - - def create_fallback_feedback(self, assignment_context: Dict, expected_format: Dict) -> Dict: - """Create structured fallback when JSON parsing fails""" - assignment_name = assignment_context["assignment"].get("name", "this assignment") - - fallback = {} - for field, default_value in expected_format.items(): - if field == "overall_feedback": - fallback[field] = f"I encountered a formatting issue while processing your submission for {assignment_name}. This appears to be a technical problem on our end. Please try resubmitting if this issue persists." - elif field == "grade_recommendation": - fallback[field] = 50 # Neutral grade for technical issues - elif isinstance(default_value, list): - if "strength" in field: - fallback[field] = ["Your submission was received and processed"] - elif "improvement" in field: - fallback[field] = ["Please ensure your submission clearly shows your work"] - else: - fallback[field] = ["Unable to provide specific feedback due to processing issue"] - else: - if field == "encouragement": - fallback[field] = "Technical issues don't reflect your effort - please try resubmitting!" - else: - fallback[field] = "Processing issue - please resubmit for detailed feedback" - - return fallback - - def create_error_feedback(self, assignment_context: Dict) -> Dict: - """Create feedback for system errors""" - assignment_name = assignment_context["assignment"].get("name", "this assignment") - - return { - "overall_feedback": f"I encountered a system error while processing your submission for {assignment_name}. This appears to be a technical issue on our end. Please try resubmitting, and if the issue persists, contact your instructor.", - "strengths": ["Your submission was received successfully"], - "areas_for_improvement": ["No issues identified with your submission - this appears to be a technical problem"], - "learning_objectives_feedback": ["Unable to evaluate due to system error - please resubmit"], - "grade_recommendation": 0, - "encouragement": "Technical issues don't reflect your effort or ability - please try again!" - } - - @staticmethod - def format_feedback_for_display(feedback: Dict) -> str: - """Format feedback for human-readable display""" - try: - formatted = [] - - if "overall_feedback" in feedback: - formatted.append("Overall Feedback:") - formatted.append(feedback["overall_feedback"]) - - if "strengths" in feedback: - formatted.append("\nStrengths:") - for strength in feedback["strengths"]: - formatted.append(f"- {strength}") - - if "areas_for_improvement" in feedback: - formatted.append("\nAreas for Improvement:") - for area in feedback["areas_for_improvement"]: - formatted.append(f"- {area}") - - if "learning_objectives_feedback" in feedback: - formatted.append("\nLearning Objectives Feedback:") - for obj in feedback["learning_objectives_feedback"]: - formatted.append(f"- {obj}") - - if "grade_recommendation" in feedback: - formatted.append(f"\nGrade Recommendation: {feedback['grade_recommendation']}") - - if "encouragement" in feedback: - formatted.append(f"\nEncouragement: {feedback['encouragement']}") - - return "\n".join(formatted) - - except Exception as e: - error_msg = f"Error formatting feedback: {str(e)}" - print(f"\nError: {error_msg}") - return "Error formatting feedback for display. Please check the JSON feedback data." - - def get_current_config(self) -> Dict: - """Get current LLM configuration""" - if not self.llm_provider: - return {"status": "not_configured"} - - return { - "provider": self.llm_provider.__class__.__name__, - "model": self.llm_provider.model_name, - "temperature": self.llm_provider.temperature, - "max_tokens": self.llm_provider.max_tokens - } diff --git a/rag_service/core/langchain_manager.py.bak b/rag_service/core/langchain_manager.py.bak deleted file mode 100644 index f51ce20..0000000 --- a/rag_service/core/langchain_manager.py.bak +++ /dev/null @@ -1,466 +0,0 @@ -# rag_service/rag_service/core/langchain_manager.py - -import frappe -from langchain_openai import ChatOpenAI -from langchain.schema import HumanMessage, SystemMessage -import json -from typing import Dict, List, Optional, Union -import httpx -from datetime import datetime - -class LangChainManager: - def __init__(self): - self.llm = None - self.setup_llm() - - def setup_llm(self): - """Initialize LLM based on settings""" - try: - llm_settings = frappe.get_list( - "LLM Settings", - filters={"is_active": 1}, - limit=1 - ) - - if not llm_settings: - raise Exception("No active LLM configuration found") - - settings = frappe.get_doc("LLM Settings", llm_settings[0].name) - print("\nUsing LLM Settings:") - print(f"Provider: {settings.provider}") - print(f"Model: {settings.model_name}") - - if settings.provider == "OpenAI": - self.llm = ChatOpenAI( - model_name=settings.model_name, - openai_api_key=settings.get_password('api_secret'), - temperature=settings.temperature, - max_tokens=settings.max_tokens - ) - elif settings.provider == "Anthropic": - # Add Anthropic model initialization if needed - raise Exception("Anthropic provider not yet implemented") - else: - raise Exception(f"Unsupported LLM provider: {settings.provider}") - - except Exception as e: - error_msg = f"LLM Setup Error: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(error_msg, "LLM Setup Error") - raise - - def clean_json_response(self, response: str) -> str: - """Clean JSON response from various formats""" - try: - # Remove markdown code blocks if present - if "```json" in response: - response = response.split("```json")[1].split("```")[0].strip() - elif "```" in response: - code_blocks = response.split("```") - if len(code_blocks) >= 3: # At least one code block exists - response = code_blocks[1].strip() - # Check if the extracted content looks like JSON - if not (response.startswith('{') or response.startswith('[')): - # If not, try to find JSON in the original response - json_start = response.find('{') - if json_start >= 0: - response = response[json_start:] - - # Try to extract JSON if response starts with explanation - if not response.strip().startswith('{'): - json_start = response.find('{') - if json_start >= 0: - response = response[json_start:] - - # Check if the response ends properly - if not response.strip().endswith('}'): - json_end = response.rfind('}') - if json_end >= 0: - response = response[:json_end+1] - - return response.strip() - except Exception as e: - print(f"Error cleaning JSON: {str(e)}") - return response - - async def validate_submission_image(self, image_url: str, assignment_type: str) -> Dict: - """Pre-validate if image appears to be appropriate for the assignment type""" - try: - print(f"\n=== Validating Submission Image ===") - print(f"URL: {image_url}") - print(f"Assignment Type: {assignment_type}") - - validation_prompt = f"""You are an artwork submission validator. -Analyze the image and determine if it is a valid submission for a {assignment_type} assignment. -You must respond ONLY with a JSON object containing these exact fields: -{{ - "is_valid": boolean, - "reason": "detailed explanation of why the image is valid or invalid", - "detected_type": "specific description of what type of image this appears to be" -}}""" - - messages = [ - SystemMessage(content=validation_prompt), - HumanMessage(content=[{ - "type": "image_url", - "image_url": {"url": image_url} - }]) - ] - - response = await self.llm.agenerate([messages]) - result = response.generations[0][0].text.strip() - print(f"\nRaw Validation Response: {result}") - - # Clean and parse the response - cleaned_result = self.clean_json_response(result) - print(f"\nCleaned Validation Response: {cleaned_result}") - - try: - validation_result = json.loads(cleaned_result) - print(f"\nParsed Validation Result: {json.dumps(validation_result, indent=2)}") - return validation_result - except json.JSONDecodeError as e: - print(f"JSON Decode Error: {str(e)}") - return { - "is_valid": True, # Default to True to avoid false negatives - "reason": "Failed to validate image format, proceeding with analysis", - "detected_type": "unvalidated_submission" - } - - except Exception as e: - error_msg = f"Image validation failed: {str(e)}" - print(f"\nError: {error_msg}") - return { - "is_valid": True, # Default to True to avoid false negatives - "reason": error_msg, - "detected_type": "error_during_validation" - } - - def format_objectives(self, objectives: List[Dict]) -> str: - """Format learning objectives for prompt""" - if not objectives: - return "No specific learning objectives provided for this assignment." - - return "\n".join([ - f"- {obj.get('description', obj.get('objective_id', 'Unknown objective'))}" - for obj in objectives - ]) - - async def get_image_content(self, image_url: str) -> Dict: - """Get image content in format required by GPT-4V""" - print(f"\nPreparing image content from URL: {image_url}") - return { - "type": "image_url", - "image_url": { - "url": image_url, - "detail": "high" - } - } - - def get_prompt_template(self, assignment_type: str) -> Dict: - """Get active prompt template for assignment type""" - try: - # First try to get an exact match for the assignment type - templates = frappe.get_list( - "Prompt Template", - filters={ - "assignment_type": assignment_type, - "is_active": 1 - }, - order_by="version desc", - limit=1 - ) - - # If no exact match found, try to get a generic template - if not templates: - print(f"\nNo specific template found for {assignment_type}, looking for generic template") - templates = frappe.get_list( - "Prompt Template", - filters={ - "is_active": 1 - }, - order_by="version desc", - limit=1 - ) - - if not templates: - raise Exception(f"No active prompt template found for {assignment_type}") - - template = frappe.get_doc("Prompt Template", templates[0].name) - print(f"\nUsing template: {template.template_name}") - - # Update the last_used timestamp - template.db_set('last_used', datetime.now()) - frappe.db.commit() - - return template - - except Exception as e: - error_msg = f"Prompt Template Error: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(error_msg, "Prompt Template Error") - raise - - def get_default_response_format(self, assignment_type: str) -> Dict: - """Get default response format if template doesn't define one""" - # Default format based on assignment type - default_formats = { - "Written": { - "overall_feedback": "Overall assessment of the written work", - "strengths": ["Strength 1", "Strength 2"], - "areas_for_improvement": ["Area 1", "Area 2"], - "learning_objectives_feedback": ["Feedback on objective 1"], - "grade_recommendation": "Numerical grade", - "encouragement": "Encouraging message for the student" - }, - "Practical": { - "overall_feedback": "Overall assessment of the practical work", - "strengths": ["Strength 1", "Strength 2"], - "areas_for_improvement": ["Area 1", "Area 2"], - "learning_objectives_feedback": ["Feedback on objective 1"], - "grade_recommendation": "Numerical grade", - "encouragement": "Encouraging message for the student" - }, - "Performance": { - "overall_feedback": "Overall assessment of the performance", - "strengths": ["Strength 1", "Strength 2"], - "areas_for_improvement": ["Area 1", "Area 2"], - "learning_objectives_feedback": ["Feedback on objective 1"], - "grade_recommendation": "Numerical grade", - "encouragement": "Encouraging message for the student" - }, - "Collaborative": { - "overall_feedback": "Overall assessment of the collaborative work", - "strengths": ["Strength 1", "Strength 2"], - "areas_for_improvement": ["Area 1", "Area 2"], - "learning_objectives_feedback": ["Feedback on objective 1"], - "grade_recommendation": "Numerical grade", - "encouragement": "Encouraging message for the student" - } - } - - # Return type-specific format or a generic one - return default_formats.get(assignment_type, default_formats["Practical"]) - - async def generate_feedback(self, assignment_context: Dict, submission_url: str, submission_id: str) -> Dict: - """Generate feedback using LangChain and GPT-4V""" - try: - print("\n=== Starting Feedback Generation ===") - - # Get assignment type from context - assignment_type = assignment_context["assignment"]["type"] - - # Get prompt template based on assignment type - template = self.get_prompt_template(assignment_type) - print("\nTemplate loaded successfully") - - # Get expected response format from template or use default - try: - if hasattr(template, 'response_format') and template.response_format: - expected_format = json.loads(template.response_format) - print("\nUsing template-defined response format") - else: - expected_format = self.get_default_response_format(assignment_type) - print("\nUsing default response format for assignment type:", assignment_type) - except json.JSONDecodeError: - expected_format = self.get_default_response_format(assignment_type) - print("\nFailed to parse template response format, using default") - - # Format expected format as JSON string - response_format_str = json.dumps(expected_format, indent=2) - - # Validate image first (keep this generic) - validation_result = await self.validate_submission_image( - submission_url, - assignment_type - ) - - # Process based on validation result - if validation_result.get("is_valid", False): - print("\nValid submission detected - generating feedback") - - # Format learning objectives if they exist - learning_objectives = "" - if assignment_context.get("learning_objectives") and len(assignment_context["learning_objectives"]) > 0: - learning_objectives = self.format_objectives(assignment_context["learning_objectives"]) - - # Prepare system prompt with expected format - enhanced_system_prompt = f""" - {template.system_prompt} - - IMPORTANT: You must ALWAYS respond with a valid JSON object matching this format exactly: - {response_format_str} - - If the image does not appear to be related to this assignment context, set the "overall_feedback" field to EXACTLY: - "Something went wrong—It looks like there's an issue from our end or your submission is incorrect! I am not able to provide feedback for your submission." - - Do not include any additional text, explanation, or markdown formatting outside the JSON object. - Return ONLY the JSON object, nothing else. - """ - - # Format variables for the user prompt - user_prompt_vars = { - "assignment_description": assignment_context["assignment"]["description"], - "learning_objectives": learning_objectives, - "assignment_type": assignment_type, - "assignment_name": assignment_context["assignment"]["name"] - } - - # Try to apply any additional variables from the template - if hasattr(template, 'variables') and template.variables: - for var in template.variables: - if var.variable_name in assignment_context: - user_prompt_vars[var.variable_name] = assignment_context[var.variable_name] - - # Format the user prompt with available variables - formatted_user_prompt = template.user_prompt - for key, value in user_prompt_vars.items(): - placeholder = "{" + key + "}" - if placeholder in formatted_user_prompt: - formatted_user_prompt = formatted_user_prompt.replace(placeholder, str(value)) - - # Prepare text content - text_content = { - "type": "text", - "text": formatted_user_prompt - } - - # Prepare image content - image_content = await self.get_image_content(submission_url) - - # Prepare messages - messages = [ - SystemMessage(content=enhanced_system_prompt), - HumanMessage(content=[text_content, image_content]) - ] - - print("\nSending request to LLM...") - - # Generate feedback - response = await self.llm.agenerate([messages]) - raw_text = response.generations[0][0].text.strip() - print("\nRaw LLM Response:") - print(raw_text) - - try: - # Clean up the response text - cleaned_text = self.clean_json_response(raw_text) - print("\nCleaned Response Text:") - print(cleaned_text) - - feedback = json.loads(cleaned_text) - print("\nSuccessfully parsed JSON response") - - # Verify that all expected fields are present - for field in expected_format: - if field not in feedback: - if isinstance(expected_format[field], list): - feedback[field] = ["No information provided"] - else: - feedback[field] = f"No information provided for {field}" - - except json.JSONDecodeError as e: - print(f"\nJSON Parse Error: {str(e)}") - print("Using fallback feedback format") - - # Create a fallback response matching the expected format - feedback = {} - for field in expected_format: - if isinstance(expected_format[field], list): - feedback[field] = ["Unable to generate proper feedback due to processing error"] - else: - feedback[field] = "Unable to generate proper feedback due to processing error" - - feedback["error"] = f"JSON parsing error: {str(e)}" - else: - print("\nInvalid submission detected - returning error feedback") - # Create an error feedback matching the expected format - feedback = {} - - # Set the special error message for overall_feedback - feedback["overall_feedback"] = "Something went wrong—It looks like there's an issue from our end or your submission is incorrect! I am not able to provide feedback for your submission." - - # Fill in other required fields - for field in expected_format: - if field != "overall_feedback": # Skip overall_feedback as we've already set it - if isinstance(expected_format[field], list): - feedback[field] = ["Please ensure your submission matches the assignment requirements"] - else: - feedback[field] = "Please ensure your submission matches the assignment requirements" - - # Add detected type information - feedback["detected_type"] = validation_result.get('detected_type', 'unknown') - feedback["reason"] = validation_result.get('reason', 'Unknown issue with submission') - - print("\n=== Feedback Generation Completed Successfully ===") - return feedback - - except Exception as e: - error_msg = f"Error generating feedback for submission {submission_id}: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(message=error_msg, title="Feedback Generation Error") - raise - - @staticmethod - def format_feedback_for_display(feedback: Dict) -> str: - """Format feedback for human-readable display""" - try: - formatted = [] - - if "overall_feedback" in feedback: - formatted.append("Overall Feedback:") - formatted.append(feedback["overall_feedback"]) - - if "strengths" in feedback: - formatted.append("\nStrengths:") - for strength in feedback["strengths"]: - formatted.append(f"- {strength}") - - if "areas_for_improvement" in feedback: - formatted.append("\nAreas for Improvement:") - for area in feedback["areas_for_improvement"]: - formatted.append(f"- {area}") - - if "learning_objectives_feedback" in feedback: - formatted.append("\nLearning Objectives Feedback:") - for obj in feedback["learning_objectives_feedback"]: - formatted.append(f"- {obj}") - - if "grade_recommendation" in feedback: - formatted.append(f"\nGrade Recommendation: {feedback['grade_recommendation']}") - - if "encouragement" in feedback: - formatted.append(f"\nEncouragement: {feedback['encouragement']}") - - # Include any additional fields not in the standard format - standard_fields = ["overall_feedback", "strengths", "areas_for_improvement", - "learning_objectives_feedback", "grade_recommendation", - "encouragement", "detected_type", "error"] - - for key, value in feedback.items(): - if key not in standard_fields: - formatted.append(f"\n{key.replace('_', ' ').title()}:") - if isinstance(value, list): - for item in value: - formatted.append(f"- {item}") - else: - formatted.append(str(value)) - - return "\n".join(formatted) - - except Exception as e: - error_msg = f"Error formatting feedback: {str(e)}" - print(f"\nError: {error_msg}") - return "Error formatting feedback" - - def get_current_config(self) -> Dict: - """Get current LLM configuration""" - if not self.llm: - return {"status": "not_configured"} - - return { - "provider": self.llm.__class__.__name__, - "model": self.llm.model_name, - "temperature": self.llm.temperature, - "max_tokens": self.llm.max_tokens - } diff --git a/rag_service/core/llm_interface.py b/rag_service/core/llm_interface.py index ff04b15..bb788f3 100644 --- a/rag_service/core/llm_interface.py +++ b/rag_service/core/llm_interface.py @@ -2,7 +2,6 @@ from abc import ABC, abstractmethod from typing import List, Dict, Optional -import json class BaseLLMInterface(ABC): """Base interface for all LLM providers""" @@ -22,6 +21,12 @@ async def generate(self, messages: List[Dict]) -> str: async def generate_with_vision(self, messages: List[Dict]) -> str: """Generate response from vision-enabled LLM""" pass + + async def generate_with_video(self, video_source, prompt: str, mime_type: Optional[str] = None): + raise NotImplementedError("Video generation is not supported by this provider") + + async def generate_with_audio(self, audio_source, prompt: str, mime_type: Optional[str] = None): + raise NotImplementedError("Audio generation is not supported by this provider") def format_messages(self, system_prompt: str, user_prompt: str, image_url: Optional[str] = None) -> List[Dict]: """Format messages for the specific LLM provider""" diff --git a/rag_service/core/llm_providers.py b/rag_service/core/llm_providers.py index e409b9e..a9573da 100644 --- a/rag_service/core/llm_providers.py +++ b/rag_service/core/llm_providers.py @@ -1,16 +1,19 @@ # rag_service/rag_service/core/llm_providers.py -import aiohttp -import asyncio -from typing import List, Dict, Optional +from together import Together +import json +from typing import Any, List, Dict, Optional from langchain_openai import ChatOpenAI from langchain.schema import HumanMessage, SystemMessage from .llm_interface import BaseLLMInterface +import vertexai +from google.oauth2 import service_account +from vertexai.generative_models import GenerativeModel, Part class OpenAIProvider(BaseLLMInterface): """OpenAI provider using LangChain""" - def __init__(self, api_key: str, model_name: str, temperature: float = 0.7, max_tokens: int = 2000): + def __init__(self, api_key: str, model_name: str, temperature: float = 0.7, max_tokens: int = 2000, settings: Any = None): super().__init__(api_key, model_name, temperature, max_tokens) self.llm = ChatOpenAI( model_name=model_name, @@ -38,66 +41,40 @@ async def generate_with_vision(self, messages: List[Dict]) -> str: class TogetherAIProvider(BaseLLMInterface): """Together AI provider optimized for Llama 3.2 90B Vision""" - def __init__(self, api_key: str, model_name: str, temperature: float = 0.7, max_tokens: int = 2000): + def __init__(self, api_key: str, model_name: str, temperature: float = 0.7, max_tokens: int = 15000, settings: Any = None): super().__init__(api_key, model_name, temperature, max_tokens) - self.base_url = "https://api.together.xyz/v1/chat/completions" - - # Llama 3.2 90B specific configurations - self.is_llama_32_90b = "Llama-3.2-90B" in model_name - if self.is_llama_32_90b: - # Optimize for Llama 3.2 90B - self.timeout = 90 # Longer timeout for larger model - self.max_retries = 3 - - async def generate(self, messages: List[Dict]) -> str: - headers = { - "Authorization": f"Bearer {self.api_key}", - "Content-Type": "application/json" - } - - data = { - "model": self.model_name, - "messages": messages, - "temperature": self.temperature, - "max_tokens": self.max_tokens, - # Llama 3.2 90B specific parameters - "top_p": 0.95, - "repetition_penalty": 1.1, - "stream": False - } - - # Add specific parameters for Llama 3.2 90B - if self.is_llama_32_90b: - data.update({ - "frequency_penalty": 0.0, - "presence_penalty": 0.0, - "stop": ["", "<|eot_id|>"] # Llama 3.2 stop tokens - }) - - timeout = aiohttp.ClientTimeout(total=self.timeout if hasattr(self, 'timeout') else 60) - - async with aiohttp.ClientSession(timeout=timeout) as session: - for attempt in range(self.max_retries if hasattr(self, 'max_retries') else 1): - try: - async with session.post(self.base_url, headers=headers, json=data) as response: - if response.status != 200: - error_text = await response.text() - if attempt < (self.max_retries - 1) if hasattr(self, 'max_retries') else 0: - await asyncio.sleep(2 ** attempt) # Exponential backoff - continue - raise Exception(f"Together AI API error: {response.status} - {error_text}") - - result = await response.json() - return result["choices"][0]["message"]["content"] - except asyncio.TimeoutError: - if attempt < (self.max_retries - 1) if hasattr(self, 'max_retries') else 0: - await asyncio.sleep(2 ** attempt) - continue - raise Exception("Request timeout - Llama 3.2 90B may need more processing time") - - async def generate_with_vision(self, messages: List[Dict]) -> str: + self.client = Together(api_key=api_key) + + async def generate(self, system_prompt: str, user_prompt: str) -> str: + try: + messages = self.format_messages(system_prompt, user_prompt) + response = await self.client.chat.completions.create( + model=self.model_name, + messages=messages, + temperature=self.temperature + ) + return response.text + except Exception as e: + print(f"Error during Together AI generation: {e}") + raise Exception(f"Error during Together AI generation: {e}") + + async def generate_with_vision(self, image_url: str, system_prompt: str, user_prompt: str = "") -> str: # Llama 3.2 90B Vision handles image URLs in the message content - return await self.generate(messages) + try: + messages = self.format_messages(system_prompt, user_prompt, image_url) + # print(f"\nFormatted messages for Together AI:\n{json.dumps(messages, indent=2)}") + response = self.client.chat.completions.create( + # reasoning={"enabled": False}, + reasoning_effort="low", + model=self.model_name, + messages=messages, + temperature=self.temperature, + max_tokens=self.max_tokens, + ) + return response + except Exception as e: + print(f"Error during Together AI vision generation: {e}") + raise Exception(f"Error during Together AI vision generation: {e}") def format_messages(self, system_prompt: str, user_prompt: str, image_url: Optional[str] = None) -> List[Dict]: """Format messages specifically for Llama 3.2 90B Vision""" @@ -118,8 +95,7 @@ def format_messages(self, system_prompt: str, user_prompt: str, image_url: Optio { "type": "image_url", "image_url": { - "url": image_url, - "detail": "high" # Llama 3.2 90B can handle high detail + "url": image_url } } ] @@ -131,19 +107,281 @@ def format_messages(self, system_prompt: str, user_prompt: str, image_url: Optio ] return messages + + def calculate_cost(self, response): + """ + Calculates the cost of a Together AI API call based on usage_metadata and input/output token costs. + Supports Together AI models. + + :param response: The ChatCompletion response object. + :param input_cost: Price per 1M input tokens. + :param output_cost: Price per 1M output tokens. + """ + # Extract token counts from the usage attribute + model_name = response["model"] + all_models = self.client.models.list() + input_cost, output_cost = 0.0, 0.0 + for model in all_models: + if model.id == model_name: + input_cost = model.pricing.input + output_cost = model.pricing.output + break + + prompt_tokens = response["usage"]["prompt_tokens"] + completion_tokens = response["usage"]["completion_tokens"] + + # Calculate costs (Standardizing to price per token by dividing by 1,000,000) + total_input_cost = (prompt_tokens / 1_000_000) * input_cost + total_output_cost = (completion_tokens / 1_000_000) * output_cost + + return total_input_cost + total_output_cost + + +class GeminiProvider(BaseLLMInterface): + """Gemini provider using Vertex AI.""" + + _vertex_init = {"key_data_id": None, "project_id": None, "location": None} + + def __init__( + self, + api_key: str, + model_name: str, + temperature: float = 0.7, + max_tokens: int = 2000, + settings: Any = None, + ): + super().__init__(api_key, model_name, temperature, max_tokens) + + self.key_data = self._resolve_service_account_credentials(settings) + self.location = settings.location + self.project_id = settings.project_id + self._ensure_vertex_init() + + def _resolve_service_account_credentials(self, settings: Any) -> Optional[Dict]: + raw_key = settings.get("credentials_json") + + if isinstance(raw_key, dict): + return raw_key + if isinstance(raw_key, str): + raw_key = raw_key.strip() + if raw_key: + try: + return json.loads(raw_key) + except json.JSONDecodeError: + return None + + return None + + def _key_data_id(self) -> Optional[tuple]: + if not self.key_data: + return None + return ( + self.key_data.get("project_id"), + self.key_data.get("client_email"), + self.key_data.get("private_key_id"), + ) + + def _ensure_vertex_init(self) -> None: + init_state = GeminiProvider._vertex_init + if not self.key_data: + raise ValueError("Gemini service account key JSON is required") + + key_data_id = self._key_data_id() + resolved_project_id = self.project_id or self.key_data.get("project_id") + if ( + init_state["key_data_id"] == key_data_id + and init_state["project_id"] == resolved_project_id + and init_state["location"] == self.location + ): + return + + credentials = service_account.Credentials.from_service_account_info(self.key_data) + if not resolved_project_id: + raise ValueError("Project ID is required for Gemini provider initialization") + + vertexai.init(project=resolved_project_id, location=self.location, credentials=credentials) + GeminiProvider._vertex_init = { + "key_data_id": key_data_id, + "project_id": resolved_project_id, + "location": self.location, + } + + def _combine_messages(self, messages: List[Dict]) -> str: + parts = [] + for msg in messages: + content = msg.get("content", "") + if isinstance(content, list): + for item in content: + if isinstance(item, dict) and item.get("type") == "text": + parts.append(item.get("text", "")) + else: + parts.append(str(content)) + return "\n\n".join([p for p in parts if p]) + + async def generate(self, messages: List[Dict]) -> str: + prompt = self._combine_messages(messages) + model = GenerativeModel(self.model_name) + response = model.generate_content( + prompt, + generation_config={ + "temperature": self.temperature, + }, + ) + cost = self.calculate_cost(response.to_dict()) + return response.text, cost or "", 0.0 + + def _build_media_part(self, media_source, mime_type: Optional[str] = None, default_kind: str = "application") -> Part: + if isinstance(media_source, dict): + media_bytes = media_source.get("content") + resolved_mime_type = mime_type or media_source.get("mime_type") + if media_bytes is not None: + return Part.from_data(data=media_bytes, mime_type=resolved_mime_type or f"{default_kind}/octet-stream") + media_url = media_source.get("submission_url") or media_source.get("url") + resolved_mime_type = resolved_mime_type or self._infer_mime_type(media_url) + return Part.from_uri(uri=self._normalize_media_uri(media_url), mime_type=resolved_mime_type) + + if isinstance(media_source, (bytes, bytearray)): + return Part.from_data(data=bytes(media_source), mime_type=mime_type or f"{default_kind}/octet-stream") + + media_url = str(media_source) + return Part.from_uri( + uri=self._normalize_media_uri(media_url), + mime_type=mime_type or self._infer_mime_type(media_url), + ) + + async def _generate_with_media(self, media_source, prompt: str, mime_type: Optional[str] = None, default_kind: str = "application") -> str: + media_part = self._build_media_part(media_source, mime_type=mime_type, default_kind=default_kind) + model = GenerativeModel(self.model_name) + response = model.generate_content( + [media_part, prompt], + generation_config={ + "temperature": self.temperature, + "response_mime_type": "application/json", + }, + ) + return response + + async def generate_with_vision(self, image_source, prompt: str, mime_type: Optional[str] = None) -> str: + try: + return await self._generate_with_media( + image_source, + prompt, + mime_type=mime_type, + default_kind="image", + ) + except Exception as e: + raise Exception(f"Error during vision generation: {e}") + + + async def generate_with_video(self, video_source, prompt: str, mime_type: Optional[str] = None) -> str: + try: + return await self._generate_with_media( + video_source, + prompt, + mime_type=mime_type, + default_kind="video", + ) + except Exception as e: + print(f"Error during video generation: {e}") + raise Exception(f"Error during video generation: {e}") + + async def generate_with_audio(self, audio_source, prompt: str, mime_type: Optional[str] = None) -> str: + try: + return await self._generate_with_media( + audio_source, + prompt, + mime_type=mime_type, + default_kind="audio", + ) + except Exception as e: + print(f"Error during audio generation: {e}") + raise Exception(f"Error during audio generation: {e}") + + def _infer_mime_type(self, url: str) -> str: + if url.endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif")): + return f"image/{url.split('.')[-1]}" + if url.endswith((".mp4", ".avi", ".mov")): + return f"video/{url.split('.')[-1]}" + if url.endswith((".mp3", ".wav", ".ogg", ".aac", ".m4a", ".flac")): + extension = url.split(".")[-1] + if extension == "mp3": + return "audio/mpeg" + if extension == "m4a": + return "audio/mp4" + return f"audio/{extension}" + return "application/octet-stream" + + def _normalize_media_uri(self, media_url: str) -> str: + if media_url.startswith("https://storage.googleapis.com/"): + return media_url.replace("https://storage.googleapis.com/", "gs://", 1) + return media_url + + def calculate_cost(self, response): + """ + Calculates the cost of a Gemini API call based on usage_metadata. + Supports Gemini 2.5 Pro (tiered), 2.5 Flash, and 3.1 Pro/Flash. + """ + metadata = response.get("usage_metadata") + model = response.get("model_version", "gemini-2.5-pro") + + + # Token counts + # For images/multimodal, 'total_token_count' includes the media tokens + output_tokens = metadata.get("candidates_token_count", 0) + input_tokens = metadata.get("total_token_count", 0) - output_tokens + + # Pricing per 1 Million Tokens (as of March 2026) + pricing = { + "gemini-2.5-pro": { + "input_std": 1.25, "output_std": 10.00, + "input_long": 2.50, "output_long": 15.00 + }, + "gemini-3.1-pro": { + "input_std": 2.00, "output_std": 12.00, + "input_long": 4.00, "output_long": 18.00 + }, + "gemini-2.5-flash": {"input": 0.30, "output": 2.50}, + "gemini-2.5-flash-lite": {"input": 0.10, "output": 0.40}, + "gemini-3-flash": {"input": 0.50, "output": 3.00} + } + + # Determine rate based on model and context length + if "2.5-pro" in model or "3.1-pro" in model: + base_model = "gemini-2.5-pro" if "2.5" in model else "gemini-3.1-pro" + # 200k token threshold for tiered pricing + if input_tokens <= 200_000: + in_rate = pricing[base_model]["input_std"] + out_rate = pricing[base_model]["output_std"] + else: + in_rate = pricing[base_model]["input_long"] + out_rate = pricing[base_model]["output_long"] + else: + # Flash models usually have flat pricing + rates = pricing.get(model, pricing["gemini-2.5-flash"]) # Default to Flash + in_rate = rates["input"] + out_rate = rates["output"] + # Calculate final cost + cost = (input_tokens * (in_rate / 1_000_000)) + (output_tokens * (out_rate / 1_000_000)) + return round(cost, 6) -# Factory function to create LLM providers -def create_llm_provider(provider: str, api_key: str, model_name: str, - temperature: float = 0.7, max_tokens: int = 2000) -> BaseLLMInterface: +def create_llm_provider( + provider: str, + api_key: str, + model_name: str, + temperature: float = 0.7, + max_tokens: int = 2000, + **kwargs, +) -> BaseLLMInterface: """Factory function to create LLM provider instances""" providers = { "OpenAI": OpenAIProvider, "Together AI": TogetherAIProvider, + "Gemini": GeminiProvider, } if provider not in providers: raise ValueError(f"Unsupported provider: {provider}") - return providers[provider](api_key, model_name, temperature, max_tokens) + return providers[provider](api_key, model_name, temperature, max_tokens, **kwargs) diff --git a/rag_service/core/rag_utils.py b/rag_service/core/rag_utils.py deleted file mode 100644 index d4fb093..0000000 --- a/rag_service/core/rag_utils.py +++ /dev/null @@ -1,94 +0,0 @@ -# File: ~/frappe-bench/apps/rag_service/rag_service/core/rag_utils.py - -import frappe -import json -from .embedding_utils import embedding_manager -from .vector_store import faiss_manager -from .feedback_generator import feedback_generator - -def process_submission(submission_id, content): - """Process a new submission through the RAG pipeline""" - try: - # Generate and store embedding - vector_store_name = embedding_manager.save_embedding( - reference_id=submission_id, - content=content, - content_type="submission" - ) - - # Add to FAISS index - faiss_manager.add_vector(vector_store_name) - - # Find similar submissions - embedding = embedding_manager.load_embedding(vector_store_name) - similar_submissions = faiss_manager.search_similar(embedding) - - return { - "vector_store_name": vector_store_name, - "similar_submissions": similar_submissions, - "plagiarism_score": min([s['distance'] for s in similar_submissions]) if similar_submissions else 0 - } - - except Exception as e: - frappe.log_error(f"Error processing submission: {str(e)}") - raise - -def find_similar_content(query_text, k=5): - """Find similar content for given text""" - try: - # Generate embedding for query - query_embedding = embedding_manager.generate_embedding(query_text) - - # Search for similar content - similar_content = faiss_manager.search_similar(query_embedding, k) - - # Get full content for results - results = [] - for item in similar_content: - vector_store = frappe.get_doc("Vector Store", item["vector_store"]) - results.append({ - "content": vector_store.content, - "content_type": vector_store.content_type, - "reference_id": vector_store.reference_id, - "similarity_score": item["distance"] - }) - - return results - - except Exception as e: - frappe.log_error(f"Error finding similar content: {str(e)}") - raise - -def generate_feedback(submission_id, content): - """Generate feedback for a submission""" - try: - # Process submission and get similar content - process_result = process_submission(submission_id, content) - - # Get similar contents with their full details - similar_contents = find_similar_content(content) - - # Generate feedback - feedback_result = feedback_generator.generate_structured_feedback( - content, - similar_contents, - plagiarism_score=process_result.get("plagiarism_score", None) - ) - - # Store feedback in Vector Store - feedback_store_name = embedding_manager.save_embedding( - reference_id=f"feedback_{submission_id}", - content=feedback_result["feedback"], - content_type="feedback" - ) - - return { - "feedback": feedback_result["feedback"], - "metadata": feedback_result["metadata"], - "similar_contents": similar_contents[:3], # Top 3 similar contents - "feedback_id": feedback_store_name - } - - except Exception as e: - frappe.log_error(f"Error generating feedback: {str(e)}") - raise diff --git a/rag_service/core/vector_store.py b/rag_service/core/vector_store.py deleted file mode 100644 index 5729d29..0000000 --- a/rag_service/core/vector_store.py +++ /dev/null @@ -1,86 +0,0 @@ -# File: rag_service/rag_service/core/vector_store.py - -import frappe -import faiss -import numpy as np -import os -from .embedding_utils import embedding_manager - -class FAISSManager: - def __init__(self): - self.index = None - self.dimension = embedding_manager.embedding_dimension - self.vector_ids = [] # To maintain mapping between FAISS and Vector Store - - def initialize_index(self): - """Initialize FAISS index""" - if self.index is None: - self.index = faiss.IndexFlatL2(self.dimension) - self._load_existing_vectors() - - def _load_existing_vectors(self): - """Load existing vectors from Vector Store into FAISS index""" - try: - vector_stores = frappe.get_all( - "Vector Store", - fields=["name", "embedding_file"] - ) - - vectors = [] - for vs in vector_stores: - try: - embedding = embedding_manager.load_embedding(vs.name) - vectors.append(embedding) - self.vector_ids.append(vs.name) - except Exception as e: - frappe.log_error(f"Error loading vector {vs.name}: {str(e)}") - - if vectors: - vectors_array = np.array(vectors).astype('float32') - self.index.add(vectors_array) - - except Exception as e: - frappe.log_error(f"Error loading existing vectors: {str(e)}") - - def add_vector(self, vector_store_name): - """Add a new vector to the index""" - try: - self.initialize_index() - - embedding = embedding_manager.load_embedding(vector_store_name) - self.index.add(embedding.reshape(1, -1).astype('float32')) - self.vector_ids.append(vector_store_name) - - except Exception as e: - frappe.log_error(f"Error adding vector to FAISS: {str(e)}") - raise - - def search_similar(self, query_vector, k=5): - """Search for similar vectors""" - try: - self.initialize_index() - - distances, indices = self.index.search( - query_vector.reshape(1, -1).astype('float32'), - k - ) - - results = [] - for i, idx in enumerate(indices[0]): - if idx < len(self.vector_ids): - vector_store = frappe.get_doc("Vector Store", self.vector_ids[idx]) - results.append({ - "vector_store": vector_store.name, - "reference_doctype": vector_store.reference_doctype, - "reference_name": vector_store.reference_name, - "distance": float(distances[0][i]) - }) - - return results - - except Exception as e: - frappe.log_error(f"Error searching similar vectors: {str(e)}") - raise - -# Create a singleton instance -faiss_manager = FAISSManager() diff --git a/rag_service/feedback_generator.py b/rag_service/feedback_generator.py deleted file mode 100644 index b66c55c..0000000 --- a/rag_service/feedback_generator.py +++ /dev/null @@ -1,54 +0,0 @@ -import frappe -import json - -def get_student_context(student_id): - try: - return frappe.get_doc("Student Context", {"student_id": student_id}) - except frappe.DoesNotExistError: - frappe.log_error(f"Student Context not found for student_id: {student_id}") - return None - -def get_assignment_context(assignment_id): - try: - return frappe.get_doc("Assignment Context", {"assignment_id": assignment_id}) - except frappe.DoesNotExistError: - frappe.log_error(f"Assignment Context not found for assignment_id: {assignment_id}") - return None - -def generate_feedback(submission_data): - """ - Initial basic feedback generation - """ - try: - # Log the received data - frappe.logger().info(f"Generating feedback for submission: {submission_data}") - - # Extract basic information - student_id = submission_data.get("student_id") - assignment_id = submission_data.get("assignment_id") - plagiarism_score = submission_data.get("plagiarism_score", 0) - - # Get contexts - student_context = get_student_context(student_id) - assignment_context = get_assignment_context(assignment_id) - - # Generate basic feedback - feedback = { - "status": "completed", - "plagiarism_assessment": { - "score": plagiarism_score, - "flag": "high" if plagiarism_score > 0.8 else "low" - }, - "feedback_text": "Submission received and processed.", - "timestamp": frappe.utils.now_datetime() - } - - return feedback - - except Exception as e: - frappe.log_error(frappe.get_traceback(), "Error in generate_feedback") - return { - "status": "error", - "error_message": str(e), - "timestamp": frappe.utils.now_datetime() - } diff --git a/rag_service/feedback_utils/audio_evaluation.py b/rag_service/feedback_utils/audio_evaluation.py new file mode 100644 index 0000000..566a11a --- /dev/null +++ b/rag_service/feedback_utils/audio_evaluation.py @@ -0,0 +1,68 @@ +# rag_service/rag_service/feedback_utils/audio_evaluation.py + +from typing import Dict, Tuple + +import frappe + +from .evaluation_generation import EvaluationGenerator +from ..utils.gcp_service_client import GCPServiceClient + + +class AudioEvaluationGenerator(EvaluationGenerator): + """Generate AI feedback for audio submissions.""" + + async def generate_feedback( + self, assignment_context: Dict, submission_data: Dict, submission_id: str + ) -> Tuple[Dict, str, str]: + media_service = None + media_asset = None + try: + print("\n=== Starting AI Feedback Generation (Audio) ===") + + llm_provider, model_used = self._create_llm_provider("Gemini") + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + template = self.get_prompt_template("audio", "both", activity_type, course_vertical) + expected_format = self._get_expected_format(template) + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + [], + ) + combined_prompt = f"{system_prompt}\n\n{formatted_user_prompt}" + + media_service = GCPServiceClient() + media_asset = media_service.download_media(submission_data["submission_url"]) + response = await llm_provider.generate_with_audio( + media_asset, + combined_prompt, + mime_type=media_asset["mime_type"], + ) + raw_text = response.text + self.cost = llm_provider.calculate_cost(response.to_dict()) + self.log_prob_feedback = response.to_dict().get("candidates", [{}])[0].get("avg_logprobs", None) + + feedback = self._parse_feedback(raw_text, expected_format) + feedback = self._attach_plagiarism_defaults(feedback) + feedback = self._attach_default_fileds(feedback) + feedback["strengths"] = [ + f"cost:{self.cost}", + f"Feedback_LP:{self.log_prob_feedback}", + "Eval_LP:0.0", + ] + + return feedback, model_used, self._template_used_name(template) + + except Exception as e: + error_msg = f"Error generating audio feedback for submission {submission_id}: {str(e)}" + print(f"\nError: {error_msg}") + frappe.log_error(message=error_msg, title="Audio Feedback Generation Error") + error_feedback = self.feedback_service.create_error_feedback(str(e)) + error_feedback = self._attach_plagiarism_defaults(error_feedback) + error_feedback["strengths"] = ["cost:1", "Feedback_LP:0.89", "Eval_LP:0.78"] + return error_feedback, "N/A", "Built-in Universal Template for Error" + finally: + if media_service: + media_service.cleanup(media_asset) diff --git a/rag_service/feedback_utils/evaluation_generation.py b/rag_service/feedback_utils/evaluation_generation.py new file mode 100644 index 0000000..e8f8159 --- /dev/null +++ b/rag_service/feedback_utils/evaluation_generation.py @@ -0,0 +1,445 @@ +# rag_service/rag_service/feedback_utils/evaluation_generation.py + +import json +from datetime import datetime +from typing import Any, Dict, List, Tuple + +import frappe +from ..core.llm_providers import create_llm_provider +from ..utils.submission_data import ( + MEDIA_SUBMISSION_TYPES, + TEXT_SUBMISSION_TYPES, + format_submission_text_for_prompt, +) + +class EvaluationGenerator: + """Shared evaluation generation utilities for different media types.""" + + IMAGE_EXTENSIONS = { + "jpg", + "jpeg", + "png", + "gif", + "webp", + "bmp", + "tiff", + "heic", + } + VIDEO_EXTENSIONS = { + "mp4", + "mov", + "webm", + "mkv", + "avi", + "mpeg", + "mpg", + } + AUDIO_EXTENSIONS = { + "mp3", + "wav", + "m4a", + "aac", + "ogg", + "flac", + } + + def __init__(self, feedback_service: Any): + self.feedback_service = feedback_service + + def detect_media_type(self, submission_data: Dict) -> str: + """Detect media type using submission_type first, then URL extension as fallback.""" + submission_type = (submission_data.get("submission_type") or "").lower() + if submission_type in MEDIA_SUBMISSION_TYPES or submission_type in TEXT_SUBMISSION_TYPES: + return "text" if submission_type in TEXT_SUBMISSION_TYPES else submission_type + + submission_url = submission_data.get("submission_url") or "" + if not submission_url: + return "image" + + url_without_query = submission_url.split("?", 1)[0].lower() + if "." in url_without_query: + ext = url_without_query.rsplit(".", 1)[-1] + if ext in self.IMAGE_EXTENSIONS: + return "image" + if ext in self.VIDEO_EXTENSIONS: + return "video" + if ext in self.AUDIO_EXTENSIONS: + return "audio" + + if "video" in url_without_query: + return "video" + if "audio" in url_without_query: + return "audio" + return "image" + + def clean_json_response(self, response: str) -> str: + """Clean JSON response from various formats.""" + try: + # Remove markdown code blocks if present + if "```json" in response: + response = response.split("```json")[1].split("```")[0].strip() + elif "```" in response: + code_blocks = response.split("```") + if len(code_blocks) >= 3: # At least one code block exists + response = code_blocks[1].strip() + # Check if the extracted content looks like JSON + if not (response.startswith("{") or response.startswith("[")): + # If not, try to find JSON in the original response + json_start = response.find("{") + if json_start >= 0: + response = response[json_start:] + + # Try to extract JSON if response starts with explanation + if not response.strip().startswith("{"): + json_start = response.find("{") + if json_start >= 0: + response = response[json_start:] + + # Check if the response ends properly + if not response.strip().endswith("}"): + json_end = response.rfind("}") + if json_end >= 0: + response = response[: json_end + 1] + + return response.strip() + except Exception as e: + print(f"Error cleaning JSON: {str(e)}") + return response + + def format_objectives(self, objectives: List[Dict]) -> str: + """Format learning objectives for prompt.""" + if not objectives: + return "No specific learning objectives provided for this assignment." + + formatted = [] + for i, obj in enumerate(objectives, 1): + if isinstance(obj, dict): + description = obj.get("description", obj.get("objective_id", "Unknown objective")) + else: + description = str(obj) + formatted.append(f"{i}. {description}") + + return "\n".join(formatted) + + def format_rubrics(self, rubrics) -> str: + """Format rubric criteria for prompt.""" + prompt = "" + if isinstance(rubrics, str): + rubrics = json.loads(rubrics) + # if "Creativity" in rubrics: + # del rubrics["Creativity"] + for criterion, grades_list in rubrics.items(): + prompt += f"\n{criterion}:\n" + for grade_item in grades_list: + prompt += ( + f" Grade {grade_item['grade_value']}: {grade_item['grade_description']}\n" + ) + + return prompt + + # def get_default_response_format(self) -> Dict: + # """Get default response format.""" + # return { + # "rubric_evaluations": [ + # { + # "skill": "Skill Name", + # "grade_value": 2, + # "observation": "specific evidence from submission", + # }, + # { + # "skill": "Skill Name", + # "grade_value": 2, + # "observation": "specific evidence from submission", + # }, + # ], + # "strengths": ["Strength 1", "Strength 2", "Strength 3"], + # "areas_for_improvement": ["Area 1", "Area 2"], + # "encouragement": "Encouraging message for the student", + # "overall_feedback": "Overall assessment of the submission", + # "overall_feedback_translated": "Translation of overall_feedback.", + # "learning_objectives_feedback": ["Feedback on objective 1"], + # "final_grade": 75, + # } + + # def get_default_evaluation_response_format(self) -> Dict: + # """Get default response format for rubric-only evaluation.""" + # return { + # "rubric_evaluations": [ + # { + # "skill": "Skill Name", + # "grade_value": 2, + # "observation": "specific evidence from submission", + # }, + # { + # "skill": "Skill Name", + # "grade_value": 2, + # "observation": "specific evidence from submission", + # }, + # ] + # } + + def get_prompt_template(self, media_type: str, prompt_type: str, activity_type: str, course_vertical: str): + """Get active template for the given media type.""" + try: + print(f"\n=== Getting Prompt Template for {prompt_type}===") + + print(f"Media Type: {media_type}") + print(f"Course Vertical: {course_vertical}") + print(f"prompt_type Type: {prompt_type}") + print(f"activity_type: {activity_type}") + + + + templates = frappe.get_list( + "Prompt Template", + filters={"is_active": 1, "media_type": media_type, "prompt_type": prompt_type, + "activity_type": activity_type, "course_vertical": course_vertical }, + order_by="version desc", + limit=1, + ) + + template = frappe.get_doc("Prompt Template", templates[0].name) + print(f"Using {media_type} {prompt_type} template: {template.template_name}") + + template.db_set("last_used", datetime.now()) + self._touch_prompt_segment(template.system_segment) + self._touch_prompt_segment(template.grading_segment) + self._touch_prompt_segment(template.subject_segment) + self._touch_prompt_segment(template.output_segment) + frappe.db.commit() + return template + + except Exception as e: + error_msg = f"Template Error: {str(e)}" + print(f"\nError: {error_msg}") + frappe.log_error("Template Error", error_msg) + raise Exception("No active template found") + + + def _get_expected_format(self, template: Any, prompt_type: str = "feedback") -> Dict: + try: + if hasattr(template, "response_format") and template.response_format: + return json.loads(template.response_format) + except Exception as e: + print(f"Invalid JSON in template response format: {e}") + raise Exception("Active template has invalid response format") + + + def _render_prompt_content(self, content: str, prompt_vars: Dict[str, Any]) -> str: + rendered = content or "" + for key, value in prompt_vars.items(): + placeholder = "{" + key + "}" + if placeholder in rendered: + rendered = rendered.replace(placeholder, str(value)) + return rendered + + def _build_prompt_vars( + self, + assignment_context: Dict, + submission_data: Dict, + rubric_evaluations: Dict, + ) -> Dict[str, Any]: + learning_objectives = self.format_objectives( + assignment_context.get("learning_objectives", []) + ) + + rubric_criteria = self.format_rubrics( + assignment_context.get("assignment", {}).get("rubrics", "") + ) + + return { + "assignment_name": assignment_context.get("assignment", {}).get("assignment_name", ""), + "assignment_description": assignment_context.get("assignment", {}).get("description", ""), + "course_vertical": assignment_context.get("assignment", {}).get("course_vertical", ""), + "assignment_type": assignment_context.get("assignment", {}).get("assignment_type", "Practical"), + "learning_objectives": learning_objectives, + "rubric_evaluations": rubric_evaluations, + "rubric_criteria": rubric_criteria, + "Language": assignment_context.get("student", {}).get("language", "English"), + "Grade_Level": assignment_context.get("student", {}).get("grade", "1"), + "submission_type": submission_data.get("submission_type", ""), + "submission_text": submission_data.get("submission_text", "") or "", + "submission_text_context": format_submission_text_for_prompt(submission_data), + "submission_rules": assignment_context.get("assignment", {}).get("submission_rules", []), + "expected_submission_type": submission_data.get("expected_submission_type", ""), + "archetype": submission_data.get("archetype"), + "current_week": submission_data.get("current_week"), + "escalation_step_at_submit": submission_data.get("escalation_step_at_submit"), + } + + def _format_prompts( + self, + template: Any, + assignment_context: Dict, + submission_data: Dict, + rubric_evaluations: Dict, + ) -> Tuple[str, str]: + try: + prompt_vars = self._build_prompt_vars( + assignment_context, + submission_data, + rubric_evaluations, + ) + # print("##############") + # print(prompt_vars) + # print("##############") + + system_segment = frappe.get_doc("Prompt Segment", template.system_segment) + grading_segment = frappe.get_doc("Prompt Segment", template.grading_segment) + subject_segment = frappe.get_doc("Prompt Segment", template.subject_segment) + output_segment = frappe.get_doc("Prompt Segment", template.output_segment) + + system_prompt = self._render_prompt_content(system_segment.content, prompt_vars) + user_prompt_sections = [ + self._render_prompt_content(grading_segment.content, prompt_vars), + self._render_prompt_content(subject_segment.content, prompt_vars), + self._render_prompt_content(output_segment.content, prompt_vars), + ] + + if submission_data.get("submission_type") in TEXT_SUBMISSION_TYPES: + user_prompt_sections.append(prompt_vars["submission_text_context"]) + + formatted_user_prompt = "\n\n".join(section for section in user_prompt_sections if section) + print("#"*80) + print(system_prompt) + print(formatted_user_prompt) + print("#"*80) + return system_prompt, formatted_user_prompt + except Exception as e: + print(f"Error formatting prompt: {e}") + raise Exception("Failed to assemble prompt with specified variables") + + def _parse_rubric_evaluations(self, raw_text: str) -> Dict: + cleaned_text = self.clean_json_response(raw_text or "") + try: + rubric_evaluations = json.loads(cleaned_text) + return rubric_evaluations + except Exception as e: + print(f"Error parsing rubric evaluations: {e}") + raise Exception("Failed to parse rubric evaluations from LLM response") + + def _parse_grade_value_feedback(self, grade_value) -> int: + feedback = { + "overall_feedback": "Good job", + "overall_feedback_translated": "Good job", + "final_grade": 0, + "rubric_evaluations": [ + { + "Skill": "Content Knowledge", + "grade_value": grade_value, + "observation": "Neutral" + } + ] + } + return feedback + + def _parse_feedback(self, raw_text: str, expected_format: Dict) -> Dict: + cleaned_text = self.clean_json_response(raw_text or "") + try: + feedback = json.loads(cleaned_text) + feedback = self.feedback_service.validate_feedback_structure(feedback, expected_format) + except json.JSONDecodeError: + feedback = self.feedback_service.create_fallback_feedback(expected_format) + + return feedback + + def _attach_plagiarism_defaults(self, feedback: Dict) -> Dict: + feedback["plagiarism_output"] = { + "is_plagiarized": False, + "is_ai_generated": False, + "match_type": "original", + "plagiarism_source": "none", + "similarity_score": 0.0, + "ai_detection_source": "none", + "ai_confidence": 0.0, + "similar_sources": [], + } + return feedback + + def _attach_evaluation_to_feedback(self, feedback: Dict, evaluation_result: Dict) -> Dict: + feedback["rubric_evaluations"] = evaluation_result.get("rubric_evaluations", []) + return feedback + + def _attach_default_fileds(self, feedback: Dict) -> Dict: + feedback.setdefault("strengths", []) + feedback.setdefault("areas_for_improvement", []) + feedback.setdefault("encouragement", "") + return feedback + + def _template_used_name(self, template: Any) -> str: + try: + if hasattr(template, "name"): + return template.name + except Exception: + pass + return "Built-in Universal Template" + + def _touch_prompt_segment(self, segment_name: str) -> None: + if not segment_name: + return + frappe.db.set_value("Prompt Segment", segment_name, "last_used", datetime.now(), update_modified=False) + + def _create_llm_provider(self, llm_provider_name: str = "Gemini") -> Tuple[Any, str]: + llm_settings = frappe.get_list( + "LLM Settings", + filters={"is_active": 1, "provider": llm_provider_name}, + limit=1, + ) + + if not llm_settings: + raise Exception(f"No active {llm_provider_name} configuration found") + + settings = frappe.get_doc("LLM Settings", llm_settings[0].name) + model_used = llm_settings[0].name + + llm_provider = create_llm_provider( + provider=llm_provider_name, + api_key="", + model_name=settings.model_name, + temperature=settings.temperature or 0, + max_tokens=settings.max_tokens or 2000, + settings=settings, + ) + return llm_provider, model_used + + async def generate_ai_feedback( + self, assignment_context: Dict, submission_data: Dict, submission_id: str + ) -> Tuple[Dict, str, str]: + media_type = self.detect_media_type(submission_data) + if media_type == "video": + from .video_evaluation import VideoEvaluationGenerator + + return await VideoEvaluationGenerator(self.feedback_service).generate_feedback( + assignment_context, submission_data, submission_id + ) + + if media_type == "audio": + from .audio_evaluation import AudioEvaluationGenerator + + return await AudioEvaluationGenerator(self.feedback_service).generate_feedback( + assignment_context, submission_data, submission_id + ) + + if media_type == "text": + from .text_evaluation import TextEvaluationGenerator + + return await TextEvaluationGenerator(self.feedback_service).generate_feedback( + assignment_context, submission_data, submission_id + ) + + from .image_evaluation_both import ImageEvaluationGenerator + + return await ImageEvaluationGenerator(self.feedback_service).generate_feedback( + assignment_context, submission_data, submission_id + ) + + + # async def generate_ai_feedback( + # self, assignment_context: Dict, submission_url: str, submission_id: str + # ) -> Tuple[Dict, str, str]: + + # from .video_evaluation import VideoEvaluationGenerator + + # return await VideoEvaluationGenerator(self.feedback_service).generate_feedback( + # assignment_context, submission_url, submission_id + # ) diff --git a/rag_service/feedback_utils/image_evaluation.py b/rag_service/feedback_utils/image_evaluation.py new file mode 100644 index 0000000..7d3d675 --- /dev/null +++ b/rag_service/feedback_utils/image_evaluation.py @@ -0,0 +1,124 @@ +# rag_service/rag_service/feedback_utils/image_evaluation.py + +import json +from typing import Any, Dict, Optional, Tuple + +import frappe + +from .evaluation_generation import EvaluationGenerator +from ..core.llm_providers import create_llm_provider + + +class ImageEvaluationGenerator(EvaluationGenerator): + """Generate AI feedback for image submissions.""" + + def _create_llm_provider(self,llm_provider_name) -> Tuple[Any, str]: + llm_settings = frappe.get_list("LLM Settings", + filters={"is_active": 1, "provider": llm_provider_name}, limit=1) + if not llm_settings: + raise Exception(f"No active {llm_provider_name} configuration found") + + settings = frappe.get_doc("LLM Settings", llm_settings[0].name) + model_used = llm_settings[0].name + + llm_provider = create_llm_provider( + provider=llm_provider_name, + api_key="", + model_name=settings.model_name, + temperature=settings.temperature or 0, + max_tokens=settings.max_tokens or 2000, + settings=settings, + ) + + return llm_provider, model_used + + async def generate_feedback( + self, assignment_context: Dict, submission_url: str, submission_id: str + ) -> Tuple[Dict, str, str]: + try: + print("\n=== Starting AI Feedback Generation (Image) ===") + submission_data = { + "submission_type": "image", + "submission_url": submission_url, + "submission_text": None, + } + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + llm_provider_name = "Gemini" + llm_provider, model_used = self._create_llm_provider(llm_provider_name) + + evaluation_result = await self.generate_evaluation(llm_provider, submission_url, assignment_context) + + template = self.get_prompt_template("image", "feedback", activity_type, course_vertical) + expected_format = self._get_expected_format(template) + + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + evaluation_result.get("rubric_evaluations", []), + ) + combined_prompt = f"{system_prompt}\n\n{formatted_user_prompt}" + + response = await llm_provider.generate_with_vision(submission_url, combined_prompt) + raw_text = response.text + cost = llm_provider.calculate_cost(response.to_dict()) + + self.log_prob_feedback = response.to_dict().get("candidates", [{}])[0].get("avg_logprobs", None) + self.cost = self.cost + cost + + print(f"\nRaw LLM Output:\n{raw_text}") + feedback = self._parse_feedback(raw_text, expected_format) + feedback = self._attach_plagiarism_defaults(feedback) + feedback = self._attach_evaluation_to_feedback(feedback, evaluation_result) + feedback = self._attach_default_fileds(feedback) + feedback['strengths'] = [f"cost:{self.cost}", f"Feedback_LP:{self.log_prob_feedback}", f"Eval_LP:{self.log_prob_eval}"] + + template_used = self._template_used_name(template) + + print("\n=== Image Feedback Generation Completed ===") + return feedback, model_used, template_used + + except Exception as e: + error_msg = f"Error generating image feedback for submission {submission_id}: {str(e)}" + print(f"\nError: {error_msg}") + frappe.log_error(message=error_msg, title="Image Feedback Generation Error") + + template_used = "Built-in Universal Template for Error" + error_feedback = self.feedback_service.create_error_feedback(str(e)) + error_feedback = self._attach_plagiarism_defaults(error_feedback) + error_feedback['strengths'] = ["cost:1", f"Feedback_LP:0.89", f"Eval_LP:0.78"] + return error_feedback, "N/A", template_used + + async def generate_evaluation(self, llm_provider: Any, submission_url: str, assignment_context: Dict) -> Dict: + try: + print("\n=== Starting Rubric Evaluation Generation (Image) ===") + submission_data = { + "submission_type": "image", + "submission_url": submission_url, + "submission_text": None, + } + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + template = self.get_prompt_template("image", "evaluation", activity_type, course_vertical) + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + [], + ) + combined_prompt = f"{system_prompt}\n\n{formatted_user_prompt}" + + response = await llm_provider.generate_with_vision(submission_url, combined_prompt) + raw_text = response.text + cost = llm_provider.calculate_cost(response.to_dict()) + self.log_prob_eval = response.to_dict().get("candidates", [{}])[0].get("avg_logprobs", None) + self.cost = cost + evaluation_result = self._parse_rubric_evaluations(raw_text) + + return evaluation_result + except Exception as e: + print(f"\nError during rubric evaluation generation: {str(e)}") + raise Exception("Failed to generate rubric evaluation for image submission") diff --git a/rag_service/feedback_utils/image_evaluation_both.py b/rag_service/feedback_utils/image_evaluation_both.py new file mode 100644 index 0000000..edf4586 --- /dev/null +++ b/rag_service/feedback_utils/image_evaluation_both.py @@ -0,0 +1,76 @@ +# rag_service/rag_service/feedback_utils/image_evaluation_both.py + +from typing import Dict, Tuple + +import frappe + +from .evaluation_generation import EvaluationGenerator +from ..utils.gcp_service_client import GCPServiceClient + + +class ImageEvaluationGenerator(EvaluationGenerator): + """Generate AI feedback for image submissions.""" + + + async def generate_feedback( + self, assignment_context: Dict, submission_data: Dict, submission_id: str + ) -> Tuple[Dict, str, str]: + media_service = None + media_asset = None + try: + print("\n=== Starting AI Feedback Generation (Image) ===") + + llm_provider, model_used = self._create_llm_provider("Gemini") + + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + + template = self.get_prompt_template("image", "both", activity_type, course_vertical) + expected_format = self._get_expected_format(template) + + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + [], + ) + combined_prompt = f"{system_prompt}\n\n{formatted_user_prompt}" + + media_service = GCPServiceClient() + media_asset = media_service.download_media(submission_data["submission_url"]) + + response = await llm_provider.generate_with_vision( + media_asset, + combined_prompt, + mime_type=media_asset["mime_type"], + ) + raw_text = response.text + self.cost = llm_provider.calculate_cost(response.to_dict()) + + self.log_prob_feedback = response.to_dict().get("candidates", [{}])[0].get("avg_logprobs", None) + + print(f"\nRaw LLM Output:\n{raw_text}") + feedback = self._parse_feedback(raw_text, expected_format) + feedback = self._attach_plagiarism_defaults(feedback) + feedback = self._attach_default_fileds(feedback) + feedback['strengths'] = [f"cost:{self.cost}", f"Feedback_LP:{self.log_prob_feedback}", f"Eval_LP:{0.0}"] + + template_used = self._template_used_name(template) + + print("\n=== Image Feedback Generation Completed ===") + return feedback, model_used, template_used + + except Exception as e: + error_msg = f"Error generating image feedback for submission {submission_id}: {str(e)}" + print(f"\nError: {error_msg}") + frappe.log_error(message=error_msg, title="Image Feedback Generation Error") + + template_used = "Built-in Universal Template for Error" + error_feedback = self.feedback_service.create_error_feedback(str(e)) + error_feedback = self._attach_plagiarism_defaults(error_feedback) + error_feedback['strengths'] = ["cost:1", f"Feedback_LP:0.89", f"Eval_LP:0.78"] + return error_feedback, "N/A", template_used + finally: + if media_service: + media_service.cleanup(media_asset) diff --git a/rag_service/feedback_utils/image_evaluation_grade_response.py b/rag_service/feedback_utils/image_evaluation_grade_response.py new file mode 100644 index 0000000..18a6882 --- /dev/null +++ b/rag_service/feedback_utils/image_evaluation_grade_response.py @@ -0,0 +1,91 @@ +# rag_service/rag_service/feedback_utils/image_evaluation.py + +import json +from typing import Any, Dict, Optional, Tuple + +import frappe + +from .evaluation_generation import EvaluationGenerator +from ..core.llm_providers import create_llm_provider + + +class ImageEvaluationGenerator(EvaluationGenerator): + """Generate AI feedback for image submissions.""" + + def _create_llm_provider(self,llm_provider_name) -> Tuple[Any, str]: + llm_settings = frappe.get_list("LLM Settings", + filters={"is_active": 1, "provider": llm_provider_name}, limit=1) + + if not llm_settings: + raise Exception(f"No active {llm_provider_name} configuration found") + + settings = frappe.get_doc("LLM Settings", llm_settings[0].name) + model_used = llm_settings[0].name + + llm_provider = create_llm_provider( + provider=llm_provider_name, + api_key="", + model_name=settings.model_name, + temperature=settings.temperature or 0, + max_tokens=settings.max_tokens or 2000, + settings=settings, + ) + + return llm_provider, model_used + + + async def generate_feedback( + self, assignment_context: Dict, submission_url: str, submission_id: str + ) -> Tuple[Dict, str, str]: + try: + print("\n=== Starting AI Feedback Generation (Image) ===") + submission_data = { + "submission_type": "image", + "submission_url": submission_url, + "submission_text": None, + } + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + llm_provider_name = "Gemini" + llm_provider, model_used = self._create_llm_provider(llm_provider_name) + + template = self.get_prompt_template("image", "evaluation", activity_type, course_vertical) + + print(assignment_context) + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + [], + ) + combined_prompt = f"{system_prompt}\n\n{formatted_user_prompt}" + print(f"\nCombined Prompt Sent to LLM:\n{combined_prompt}") + + response = await llm_provider.generate_with_vision(submission_url, combined_prompt) + raw_text = response.text + self.cost = llm_provider.calculate_cost(response.to_dict()) + + self.log_prob_feedback = response.to_dict().get("candidates", [{}])[0].get("avg_logprobs", None) + + print(f"\nRaw LLM Output:\n{raw_text}") + feedback = self._parse_grade_value_feedback(raw_text) + feedback = self._attach_plagiarism_defaults(feedback) + feedback = self._attach_default_fileds(feedback) + feedback['strengths'] = [f"cost:{self.cost}", f"Feedback_LP:{self.log_prob_feedback}", f"Eval_LP:{0.0}"] + + template_used = self._template_used_name(template) + + print("\n=== Image Feedback Generation Completed ===") + return feedback, model_used, template_used + + except Exception as e: + error_msg = f"Error generating image feedback for submission {submission_id}: {str(e)}" + print(f"\nError: {error_msg}") + frappe.log_error(message=error_msg, title="Image Feedback Generation Error") + + template_used = "Built-in Universal Template for Error" + error_feedback = self.feedback_service.create_error_feedback(str(e)) + error_feedback = self._attach_plagiarism_defaults(error_feedback) + error_feedback['strengths'] = ["cost:0.0", f"Feedback_LP:0.89", f"Eval_LP:0.78"] + return error_feedback, "N/A", template_used diff --git a/rag_service/feedback_utils/image_evaluation_no_rubric.py b/rag_service/feedback_utils/image_evaluation_no_rubric.py new file mode 100644 index 0000000..fbd74f9 --- /dev/null +++ b/rag_service/feedback_utils/image_evaluation_no_rubric.py @@ -0,0 +1,137 @@ +# rag_service/rag_service/feedback_utils/image_evaluation.py + +import json +from typing import Any, Dict, Optional, Tuple + +import frappe + +from .evaluation_generation import EvaluationGenerator +from ..core.llm_providers import create_llm_provider + + +class ImageEvaluationGenerator(EvaluationGenerator): + """Generate AI feedback for image submissions.""" + + def _create_llm_provider(self,llm_provider_name) -> Tuple[Any, str]: + llm_settings = frappe.get_list("LLM Settings", + filters={"is_active": 1, "provider": llm_provider_name}, limit=1) + + if not llm_settings: + raise Exception(f"No active {llm_provider_name} configuration found") + + settings = frappe.get_doc("LLM Settings", llm_settings[0].name) + model_used = llm_settings[0].name + + llm_provider = create_llm_provider( + provider=llm_provider_name, + api_key="", + model_name=settings.model_name, + temperature=settings.temperature or 0, + max_tokens=settings.max_tokens or 2000, + settings=settings, + ) + + return llm_provider, model_used + + + async def generate_feedback( + self, assignment_context: Dict, submission_url: str, submission_id: str + ) -> Tuple[Dict, str, str]: + try: + print("\n=== Starting AI Feedback Generation (Image) ===") + submission_data = { + "submission_type": "image", + "submission_url": submission_url, + "submission_text": None, + } + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + llm_provider_name = "Gemini" + llm_provider, model_used = self._create_llm_provider(llm_provider_name) + + template = self.get_prompt_template("image", "both", activity_type, course_vertical) + expected_format = self._get_expected_format(template) + assignment_context["assignment"]["rubrics"] = { + "Content Knowledge": [ + { + "grade_value": 1, + "grade_description": "Invalid or random submission — task is blank, off-topic, or unrelated. No link to concept seen." + }, + { + "grade_value": 2, + "grade_description": "Some link to topic visible but full of mistakes or confusion. The student likely didn't understand all steps." + }, + { + "grade_value": 3, + "grade_description": "Main idea is correct and partly applied. Student is trying to use the concept but not yet fully correct." + }, + { + "grade_value": 4, + "grade_description": "Work is accurate, clear, and independently done. Student can apply the concept correctly as shown." + }, + { + "grade_value": 5, + "grade_description": "Work extends the concept meaningfully — connects it to real-life or shows creative application." + } + ], + "Creativity": [ + { + "grade_value": 1, + "grade_description": "Invalid or no submission. Task blank or repeated exactly as taught, showing no original input." + }, + { + "grade_value": 2, + "grade_description": "Minor variation without reason. Adds one small change (color, word, step, example) but without any creative link." + }, + { + "grade_value": 3, + "grade_description": "Begins to combine ideas. Connects two or more taught concepts or introduces a small improvement that shows personal thought." + }, + { + "grade_value": 4, + "grade_description": "Applies imagination with purpose. Adjusts or redesigns task elements to make it clearer, more effective, or more interesting." + }, + { + "grade_value": 5, + "grade_description": "Generates and improves ideas. Produces original and relevant solutions by combining, evaluating, or refining ideas; connects learning to real-world or cross-topic contexts." + } + ] +} + print(assignment_context) + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + [], + ) + combined_prompt = f"{system_prompt}\n\n{formatted_user_prompt}" + print(f"\nCombined Prompt Sent to LLM:\n{combined_prompt}") + + response = await llm_provider.generate_with_vision(submission_url, combined_prompt) + raw_text = response.text + self.cost = llm_provider.calculate_cost(response.to_dict()) + + self.log_prob_feedback = response.to_dict().get("candidates", [{}])[0].get("avg_logprobs", None) + + print(f"\nRaw LLM Output:\n{raw_text}") + feedback = self._parse_feedback(raw_text, expected_format) + feedback = self._attach_plagiarism_defaults(feedback) + feedback = self._attach_default_fileds(feedback) + feedback['strengths'] = [f"cost:{self.cost}", f"Feedback_LP:{self.log_prob_feedback}", f"Eval_LP:{0.0}"] + + template_used = self._template_used_name(template) + + print("\n=== Image Feedback Generation Completed ===") + return feedback, model_used, template_used + + except Exception as e: + error_msg = f"Error generating image feedback for submission {submission_id}: {str(e)}" + print(f"\nError: {error_msg}") + frappe.log_error(message=error_msg, title="Image Feedback Generation Error") + + template_used = "Built-in Universal Template for Error" + error_feedback = self.feedback_service.create_error_feedback(str(e)) + error_feedback = self._attach_plagiarism_defaults(error_feedback) + error_feedback['strengths'] = ["cost:1", f"Feedback_LP:0.89", f"Eval_LP:0.78"] + return error_feedback, "N/A", template_used diff --git a/rag_service/feedback_utils/image_evaluation_only.py b/rag_service/feedback_utils/image_evaluation_only.py new file mode 100644 index 0000000..469470b --- /dev/null +++ b/rag_service/feedback_utils/image_evaluation_only.py @@ -0,0 +1,157 @@ +# rag_service/rag_service/feedback_utils/image_evaluation.py + +import json +from typing import Any, Dict, Optional, Tuple + +import frappe + +from .evaluation_generation import EvaluationGenerator +from ..core.llm_providers import create_llm_provider + + +class ImageEvaluationGenerator(EvaluationGenerator): + """Generate AI feedback for image submissions.""" + + def _resolve_service_account_credentials(self, settings: Any) -> Optional[Dict]: + raw_key = settings.get("credentials_json") + + if isinstance(raw_key, dict): + return raw_key + if isinstance(raw_key, str): + raw_key = raw_key.strip() + if raw_key: + try: + return json.loads(raw_key) + except json.JSONDecodeError: + return None + + return None + + def _create_llm_provider(self) -> Tuple[Any, str]: + gemini_settings = frappe.get_list("Gemini Settings", filters={"is_active": 1}, limit=1) + if not gemini_settings: + raise Exception("No active Gemini configuration found") + + settings = frappe.get_doc("Gemini Settings", gemini_settings[0].name) + model_used = gemini_settings[0].name + + key_data = self._resolve_service_account_credentials(settings) + if not key_data: + raise Exception("Gemini service account key JSON is required") + + llm_provider = create_llm_provider( + provider="Gemini", + api_key="", + model_name=settings.model_name, + temperature=settings.temperature or 0, + max_tokens=settings.max_tokens or 2000, + key_data=key_data, + location=settings.location or "us-central1", + project_id=settings.project_id or None, + ) + + return llm_provider, model_used + + async def generate_feedback( + self, assignment_context: Dict, submission_url: str, submission_id: str + ) -> Tuple[Dict, str, str]: + try: + print("\n=== Starting AI Feedback Generation (Image) ===") + submission_data = { + "submission_type": "image", + "submission_url": submission_url, + "submission_text": None, + } + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + llm_provider, model_used = self._create_llm_provider() + + evaluation_result = await self.generate_evaluation(llm_provider, submission_url, assignment_context) + + template = self.get_prompt_template("image", "feedback", activity_type, course_vertical) + expected_format = self._get_expected_format(template) + + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + evaluation_result.get("rubric_evaluations", []), + ) + combined_prompt = f"{system_prompt}\n\n{formatted_user_prompt}" + + print("\nGenerated Prompt for LLM:") + print(combined_prompt) + + # response = await llm_provider.generate_with_vision(submission_url, combined_prompt) + # raw_text = response.text + # cost = llm_provider.calculate_cost(response.to_dict()) + + # self.log_prob_feedback = response.to_dict().get("candidates", [{}])[0].get("avg_logprobs", None) + # self.cost = self.cost + cost + + # print(f"\nRaw LLM Output:\n{raw_text}") + # feedback = self._parse_feedback(raw_text, expected_format) + + feedback = { + "overall_feedback": "Great job! Keep up the creative work!", + "overall_feedback_translated": "बहुत अच्छा काम किया, इसे जारी रखें!", + "final_grade": 40 + } + self.log_prob_feedback = -1.23 # Example log probability for feedback generation + print(f"\nParsed Feedback:\n{feedback}") + feedback = self._attach_plagiarism_defaults(feedback) + print(f"\nModel Used: {model_used}") + feedback = self._attach_evaluation_to_feedback(feedback, evaluation_result) + feedback = self._attach_default_fileds(feedback) + feedback['strengths'] = [f"cost:{self.cost}", f"Feedback_LP:{self.log_prob_feedback}", f"Eval_LP:{self.log_prob_eval}"] + print(f"\nParsed Feedback:\n{feedback}") + + template_used = self._template_used_name(template) + + print("\n=== Image Feedback Generation Completed ===") + return feedback, model_used, template_used + + except Exception as e: + error_msg = f"Error generating image feedback for submission {submission_id}: {str(e)}" + print(f"\nError: {error_msg}") + frappe.log_error(message=error_msg, title="Image Feedback Generation Error") + + template_used = "Built-in Universal Template for Error" + error_feedback = self.feedback_service.create_error_feedback(str(e)) + error_feedback = self._attach_plagiarism_defaults(error_feedback) + error_feedback['strengths'] = ["cost:1", f"Feedback_LP:0.89", f"Eval_LP:0.78"] + return error_feedback, "N/A", template_used + + async def generate_evaluation(self, llm_provider: Any, submission_url: str, assignment_context: Dict) -> Dict: + try: + print("\n=== Starting Rubric Evaluation Generation (Image) ===") + submission_data = { + "submission_type": "image", + "submission_url": submission_url, + "submission_text": None, + } + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + template = self.get_prompt_template("image", "evaluation", activity_type, course_vertical) + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + [], + ) + combined_prompt = f"{system_prompt}\n\n{formatted_user_prompt}" + + response = await llm_provider.generate_with_vision(submission_url, combined_prompt) + raw_text = response.text + cost = llm_provider.calculate_cost(response.to_dict()) + self.log_prob_eval = response.to_dict().get("candidates", [{}])[0].get("avg_logprobs", None) + self.cost = cost + evaluation_result = self._parse_rubric_evaluations(raw_text) + + print("\n=== Rubric Evaluation Generation Completed (Image) ===") + return evaluation_result + except Exception as e: + print(f"\nError during rubric evaluation generation: {str(e)}") + raise Exception("Failed to generate rubric evaluation for image submission") diff --git a/rag_service/feedback_utils/image_evaluation_together_ai.py b/rag_service/feedback_utils/image_evaluation_together_ai.py new file mode 100644 index 0000000..937a7e3 --- /dev/null +++ b/rag_service/feedback_utils/image_evaluation_together_ai.py @@ -0,0 +1,93 @@ +# rag_service/rag_service/feedback_utils/image_evaluation.py + +import json +from typing import Any, Dict, Optional, Tuple + +import frappe + +from .evaluation_generation import EvaluationGenerator +from ..core.llm_providers import create_llm_provider + + +class ImageEvaluationGenerator(EvaluationGenerator): + """Generate AI feedback for image submissions.""" + + def _create_llm_provider(self,llm_provider_name, model_name) -> Tuple[Any, str]: + llm_settings = frappe.get_list("LLM Settings", + filters={"is_active": 1, "provider": llm_provider_name}, limit=1) + + if not llm_settings: + raise Exception(f"No active {llm_provider_name} configuration found") + + settings = frappe.get_doc("LLM Settings", llm_settings[0].name) + model_used = llm_settings[0].name + + llm_provider = create_llm_provider( + provider=llm_provider_name, + api_key=settings.api_key, + model_name=model_name, + temperature=settings.temperature or 0, + max_tokens=settings.max_tokens or 2000 + ) + + return llm_provider, model_used + + + async def generate_feedback( + self, assignment_context: Dict, submission_url: str, submission_id: str + ) -> Tuple[Dict, str, str]: + try: + print("\n=== Starting AI Feedback Generation (Image) ===") + submission_data = { + "submission_type": "image", + "submission_url": submission_url, + "submission_text": None, + } + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + llm_provider_name = "Together AI" + # model_name = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" + # model_name = "google/gemma-3n-E4B-it" + model_name = "Qwen/Qwen3.5-9B" + # model_name = "ServiceNow-AI/Apriel-1.6-15b-Thinker" + + llm_provider, model_used = self._create_llm_provider(llm_provider_name, model_name) + + template = self.get_prompt_template("image", "both", activity_type, course_vertical) + expected_format = self._get_expected_format(template) + + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + [], + ) + response = await llm_provider.generate_with_vision(submission_url, system_prompt, formatted_user_prompt) + print(f"\nRaw LLM Output:\n{response}") + raw_text = response.choices[0].message.content + print(f"\nLLM Output:\n{raw_text}") + self.cost = llm_provider.calculate_cost(response.to_dict()) + self.log_prob_feedback = model_name + + + feedback = self._parse_feedback(raw_text, expected_format) + feedback = self._attach_plagiarism_defaults(feedback) + feedback = self._attach_default_fileds(feedback) + feedback['strengths'] = [f"cost:{self.cost}", f"Feedback_LP:{self.log_prob_feedback}", f"Eval_LP:{0.0}"] + + template_used = self._template_used_name(template) + + print("\n=== Image Feedback Generation Completed ===") + return feedback, model_used, template_used + + except Exception as e: + error_msg = f"Error generating image feedback for submission {submission_id}: {str(e)}" + print(f"\nError: {error_msg}") + frappe.log_error(message=error_msg, title="Image Feedback Generation Error") + + template_used = "Built-in Universal Template for Error" + error_feedback = self.feedback_service.create_error_feedback(str(e)) + error_feedback = self._attach_plagiarism_defaults(error_feedback) + error_feedback['strengths'] = ["cost:0", f"Feedback_LP:0.0", f"Eval_LP:0.0"] + return error_feedback, "N/A", template_used diff --git a/rag_service/feedback_utils/text_evaluation.py b/rag_service/feedback_utils/text_evaluation.py new file mode 100644 index 0000000..1c0104c --- /dev/null +++ b/rag_service/feedback_utils/text_evaluation.py @@ -0,0 +1,57 @@ +# rag_service/rag_service/feedback_utils/text_evaluation.py + +from typing import Dict, Tuple + +import frappe +import traceback + +from .evaluation_generation import EvaluationGenerator + + +class TextEvaluationGenerator(EvaluationGenerator): + """Generate AI feedback for text and emoji submissions.""" + + async def generate_feedback( + self, assignment_context: Dict, submission_data: Dict, submission_id: str + ) -> Tuple[Dict, str, str]: + try: + print("\n=== Starting AI Feedback Generation (Text) ===") + + llm_provider, model_used = self._create_llm_provider("Gemini") + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + + template = self.get_prompt_template("text", "both", activity_type, course_vertical) + expected_format = self._get_expected_format(template) + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + [], + ) + + + response, cost, _ = await llm_provider.generate( + [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": formatted_user_prompt}, + ] + ) + + feedback = self._parse_feedback(response, expected_format) + feedback = self._attach_plagiarism_defaults(feedback) + feedback = self._attach_default_fileds(feedback) + feedback["strengths"] = [f"cost:{cost}", "Feedback_LP:0.0", "Eval_LP:0.0"] + print("$$$$$$$$$$$$$$") + print(feedback) + + return feedback, model_used, self._template_used_name(template) + + except Exception as e: + error_msg = f"Error generating text feedback for submission {submission_id}: {str(e)}\n{traceback.format_exc()}" + print(f"\nError: {error_msg}") + frappe.log_error(message=error_msg, title="Text Feedback Generation Error") + error_feedback = self.feedback_service.create_error_feedback(str(e)) + error_feedback = self._attach_plagiarism_defaults(error_feedback) + error_feedback["strengths"] = ["cost:0", "Feedback_LP:0.0", "Eval_LP:0.0"] + return error_feedback, "N/A", "Built-in Universal Template for Error" diff --git a/rag_service/feedback_utils/video_evaluation.py b/rag_service/feedback_utils/video_evaluation.py new file mode 100644 index 0000000..ff49c7e --- /dev/null +++ b/rag_service/feedback_utils/video_evaluation.py @@ -0,0 +1,76 @@ +# rag_service/rag_service/feedback_utils/video_evaluation.py + +from typing import Dict, Tuple + +import frappe + +from .evaluation_generation import EvaluationGenerator +from ..utils.gcp_service_client import GCPServiceClient + + +class VideoEvaluationGenerator(EvaluationGenerator): + """Generate AI feedback for video submissions.""" + + async def generate_feedback( + self, assignment_context: Dict, submission_data: Dict, submission_id: str + ) -> Tuple[Dict, str, str]: + media_service = None + media_asset = None + try: + print("\n=== Starting AI Feedback Generation (Image) ===") + + llm_provider, model_used = self._create_llm_provider("Gemini") + + activity_type = assignment_context["assignment"].get("activity_type") + course_vertical = assignment_context["assignment"].get("course_vertical") + print(f"Activity Type: {activity_type}, Course Vertical: {course_vertical}") + + + template = self.get_prompt_template("video", "both", activity_type, course_vertical) + expected_format = self._get_expected_format(template) + + system_prompt, formatted_user_prompt = self._format_prompts( + template, + assignment_context, + submission_data, + [], + ) + combined_prompt = f"{system_prompt}\n\n{formatted_user_prompt}" + + media_service = GCPServiceClient() + media_asset = media_service.download_media(submission_data["submission_url"]) + + response = await llm_provider.generate_with_video( + media_asset, + combined_prompt, + mime_type=media_asset["mime_type"], + ) + raw_text = response.text + self.cost = llm_provider.calculate_cost(response.to_dict()) + + self.log_prob_feedback = response.to_dict().get("candidates", [{}])[0].get("avg_logprobs", None) + + print(f"\nRaw LLM Output:\n{raw_text}") + feedback = self._parse_feedback(raw_text, expected_format) + feedback = self._attach_plagiarism_defaults(feedback) + feedback = self._attach_default_fileds(feedback) + feedback['strengths'] = [f"cost:{self.cost}", f"Feedback_LP:{self.log_prob_feedback}", f"Eval_LP:{0.0}"] + + template_used = self._template_used_name(template) + + print("\n=== Image Feedback Generation Completed ===") + return feedback, model_used, template_used + + except Exception as e: + error_msg = f"Error generating image feedback for submission {submission_id}: {str(e)}" + print(f"\nError: {error_msg}") + frappe.log_error(message=error_msg, title="Image Feedback Generation Error") + + template_used = "Built-in Universal Template for Error" + error_feedback = self.feedback_service.create_error_feedback(str(e)) + error_feedback = self._attach_plagiarism_defaults(error_feedback) + error_feedback['strengths'] = ["cost:1", f"Feedback_LP:0.89", f"Eval_LP:0.78"] + return error_feedback, "N/A", template_used + finally: + if media_service: + media_service.cleanup(media_asset) diff --git a/rag_service/handlers/__init__.py b/rag_service/handlers/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/rag_service/rabbitmq_utils.py b/rag_service/rabbitmq_utils.py deleted file mode 100644 index a7f026c..0000000 --- a/rag_service/rabbitmq_utils.py +++ /dev/null @@ -1,72 +0,0 @@ -# File: rag_service/rag_service/rabbitmq_utils.py - -import frappe -import pika -import json -from .core.rag_utils import process_submission, find_similar_content - -def process_message(ch, method, properties, body): - try: - message_data = json.loads(body) - - # Process the submission - result = process_submission( - message_data.get("submission_id"), - message_data.get("content") - ) - - # Create feedback request - feedback_request = frappe.get_doc({ - "doctype": "Feedback Request", - "request_id": message_data.get("submission_id"), - "student_id": message_data.get("student_id"), - "assignment_id": message_data.get("assignment_id"), - "submission_content": message_data.get("content"), - "plagiarism_score": message_data.get("plagiarism_score"), - "status": "Processing", - "similar_submissions": json.dumps(result.get("similar_submissions")) - }) - - feedback_request.insert() - frappe.db.commit() - - except Exception as e: - frappe.log_error(frappe.get_traceback(), "Error processing RabbitMQ message") - - -def start_consuming(): - try: - settings = get_rabbitmq_settings() - - # Log the connection attempt - frappe.logger().info(f"Connecting to RabbitMQ at {settings.host}:{settings.port}") - - # Create connection - credentials = pika.PlainCredentials(settings.username, settings.password) - parameters = pika.ConnectionParameters( - host=settings.host, - port=settings.port, - virtual_host=settings.virtual_host, - credentials=credentials - ) - - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - - # Ensure queue exists - channel.queue_declare(queue=settings.plagiarism_results_queue, durable=True) - - # Set up consumer - channel.basic_consume( - queue=settings.plagiarism_results_queue, - on_message_callback=process_message, - auto_ack=True - ) - - frappe.logger().info("Started consuming messages...") - channel.start_consuming() - - except Exception as e: - frappe.log_error(frappe.get_traceback(), "Error in RabbitMQ consumer") - raise - diff --git a/rag_service/rag_service/doctype/assignment_context/assignment_context.json b/rag_service/rag_service/doctype/assignment_context/assignment_context.json index b7dceba..bcca660 100644 --- a/rag_service/rag_service/doctype/assignment_context/assignment_context.json +++ b/rag_service/rag_service/doctype/assignment_context/assignment_context.json @@ -7,11 +7,16 @@ "field_order": [ "assignment_id", "assignment_name", + "program_name", "course_vertical", + "activity_type", "description", - "rubric", + "rubrics", "learning_objectives", "difficulty_level", + "difficulty_tier", + "submission_guidelines", + "submission_rules", "last_updated", "version", "cache_valid_till", @@ -36,8 +41,13 @@ "fieldtype": "Data", "label": "Course Vertical" }, +{ + "fieldname": "activity_type", + "fieldtype": "Data", + "label": "Activity Type" +}, { - "fieldname": "rubric", + "fieldname": "rubrics", "fieldtype": "Text", "label": "Rubric" }, @@ -88,6 +98,26 @@ "fieldtype": "Data", "label": "Max Score" }, + { + "fieldname": "program_name", + "fieldtype": "Data", + "label": "Program Name" + }, + { + "fieldname": "difficulty_tier", + "fieldtype": "Data", + "label": "Difficulty Tier" + }, + { + "fieldname": "submission_guidelines", + "fieldtype": "Text", + "label": "Submission Guidelines" + }, + { + "fieldname": "submission_rules", + "fieldtype": "Text", + "label": "Submission Rules" + }, { "fieldname": "description", "fieldtype": "Text", diff --git a/rag_service/rag_service/doctype/feedback_request/feedback_request.js b/rag_service/rag_service/doctype/feedback_request/feedback_request.js index 2def124..4724608 100644 --- a/rag_service/rag_service/doctype/feedback_request/feedback_request.js +++ b/rag_service/rag_service/doctype/feedback_request/feedback_request.js @@ -1,8 +1,27 @@ -// Copyright (c) 2024, TAP and contributors -// For license information, please see license.txt +frappe.listview_settings['Feedback Request'] = { + add_fields: ["result_status", "is_plagiarized", "is_ai_generated"], -// frappe.ui.form.on("Feedback Request", { -// refresh(frm) { + get_indicator: function(doc) { + const status_map = { + "Pending": ["orange", "Pending"], + "Success - Original": ["green", "Original"], + "Success - Flagged": ["red", "Flagged"], + "Failed": ["darkgrey", "Failed"] + }; -// }, -// }); + const [color, label] = status_map[doc.result_status] || ["grey", "Unknown"]; + return [__(label), color, `result_status,=,${doc.result_status}`]; + }, + + formatters: { + result_status: function(value) { + const badges = { + "Pending": 'Pending', + "Success - Original": '✓ Original', + "Success - Flagged": '⚠ Flagged', + "Failed": '✗ Failed' + }; + return badges[value] || value; + } + } +}; diff --git a/rag_service/rag_service/doctype/feedback_request/feedback_request.json b/rag_service/rag_service/doctype/feedback_request/feedback_request.json index 12aef79..c426cd5 100644 --- a/rag_service/rag_service/doctype/feedback_request/feedback_request.json +++ b/rag_service/rag_service/doctype/feedback_request/feedback_request.json @@ -9,6 +9,9 @@ "student_id", "assignment_id", "submission_id", + "submission_type", + "submission_url", + "submission_text", "submission_content", "plagiarism_score", "similar_sources", @@ -21,7 +24,14 @@ "model_used", "processing_attempts", "error_log", - "is_archived" + "is_archived", + "result_status", + "is_plagiarized", + "match_type", + "plagiarism_source", + "is_ai_generated", + "ai_detection_source", + "ai_confidence" ], "fields": [ { @@ -39,6 +49,22 @@ "fieldtype": "Data", "label": "Submission ID" }, + { + "fieldname": "submission_type", + "fieldtype": "Select", + "label": "Submission Type", + "options": "\nimage\nvideo\naudio\ntext\nemoji" + }, + { + "fieldname": "submission_url", + "fieldtype": "Data", + "label": "Submission URL" + }, + { + "fieldname": "submission_text", + "fieldtype": "Small Text", + "label": "Submission Text" + }, { "fieldname": "submission_content", "fieldtype": "Text", @@ -82,15 +108,13 @@ }, { "fieldname": "template_used", - "fieldtype": "Link", - "label": "Template Used", - "options": "Prompt Template" + "fieldtype": "Text", + "label": "Template Used" }, { "fieldname": "model_used", - "fieldtype": "Link", - "label": "Model Used", - "options": "LLM Settings" + "fieldtype": "Text", + "label": "Model Used" }, { "fieldname": "processing_attempts", @@ -107,6 +131,48 @@ "fieldname": "is_archived", "fieldtype": "Check", "label": "Is Archived" + }, + { + "fieldname": "result_status", + "fieldtype": "Select", + "label": "Result Status", + "options": "Pending\nSuccess - Original\nSuccess - Flagged\nFailed", + "default": "Pending", + "in_list_view": 1, + "in_standard_filter": 1, + "description": "Overall feedback generation result status" + }, + { + "fieldname": "is_plagiarized", + "fieldtype": "Check", + "label": "Is Plagiarized" + }, + { + "fieldname": "match_type", + "fieldtype": "Select", + "label": "Match Type", + "options": "\noriginal\nexact_duplicate\nnear_duplicate\nsemantic_match\nai_generated\nresubmission_allowed" + }, + { + "fieldname": "plagiarism_source", + "fieldtype": "Select", + "label": "Plagiarism Source", + "options": "\nnone\npeer\npeer_collusion\nself_cross_assignment\nself_late_resubmission\nreference\nai_generated" + }, + { + "fieldname": "is_ai_generated", + "fieldtype": "Check", + "label": "Is AI Generated" + }, + { + "fieldname": "ai_detection_source", + "fieldtype": "Data", + "label": "AI Detection Source" + }, + { + "fieldname": "ai_confidence", + "fieldtype": "Float", + "label": "AI Confidence" } ], "index_web_pages_for_search": 1, @@ -135,4 +201,4 @@ "sort_field": "modified", "sort_order": "DESC", "states": [] -} \ No newline at end of file +} diff --git a/rag_service/rag_service/doctype/gcs_settings/__init__.py b/rag_service/rag_service/doctype/gcs_settings/__init__.py new file mode 100644 index 0000000..8980646 --- /dev/null +++ b/rag_service/rag_service/doctype/gcs_settings/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2026, TAP and contributors + diff --git a/rag_service/rag_service/doctype/gcs_settings/gcs_settings.js b/rag_service/rag_service/doctype/gcs_settings/gcs_settings.js new file mode 100644 index 0000000..98d0b79 --- /dev/null +++ b/rag_service/rag_service/doctype/gcs_settings/gcs_settings.js @@ -0,0 +1 @@ +frappe.ui.form.on("GCS Settings", {}); diff --git a/rag_service/rag_service/doctype/gcs_settings/gcs_settings.json b/rag_service/rag_service/doctype/gcs_settings/gcs_settings.json new file mode 100644 index 0000000..c5cfc04 --- /dev/null +++ b/rag_service/rag_service/doctype/gcs_settings/gcs_settings.json @@ -0,0 +1,56 @@ +{ + "actions": [], + "allow_rename": 1, + "creation": "2026-04-27 00:00:00.000000", + "doctype": "DocType", + "engine": "InnoDB", + "field_order": [ + "project_id", + "credentials_json", + "description" + ], + "fields": [ + { + "fieldname": "project_id", + "fieldtype": "Data", + "label": "Project ID" + }, + { + "fieldname": "credentials_json", + "fieldtype": "Code", + "label": "Service Account Credentials (JSON)", + "options": "JSON", + "reqd": 1 + }, + { + "fieldname": "description", + "fieldtype": "Small Text", + "label": "Description" + } + ], + "index_web_pages_for_search": 1, + "issingle": 1, + "links": [], + "modified": "2026-04-27 00:00:00.000000", + "modified_by": "Administrator", + "module": "Rag Service", + "name": "GCS Settings", + "owner": "Administrator", + "permissions": [ + { + "create": 1, + "delete": 1, + "email": 1, + "export": 1, + "print": 1, + "read": 1, + "report": 1, + "role": "System Manager", + "share": 1, + "write": 1 + } + ], + "sort_field": "modified", + "sort_order": "DESC", + "states": [] +} diff --git a/rag_service/rag_service/doctype/gcs_settings/gcs_settings.py b/rag_service/rag_service/doctype/gcs_settings/gcs_settings.py new file mode 100644 index 0000000..f75f644 --- /dev/null +++ b/rag_service/rag_service/doctype/gcs_settings/gcs_settings.py @@ -0,0 +1,8 @@ +# Copyright (c) 2026, TAP and contributors +# For license information, please see license.txt + +from frappe.model.document import Document + + +class GCSSettings(Document): + pass diff --git a/rag_service/rag_service/doctype/gcs_settings/test_gcs_settings.py b/rag_service/rag_service/doctype/gcs_settings/test_gcs_settings.py new file mode 100644 index 0000000..f5458d5 --- /dev/null +++ b/rag_service/rag_service/doctype/gcs_settings/test_gcs_settings.py @@ -0,0 +1,7 @@ +# Copyright (c) 2026, TAP and contributors + +from frappe.tests.utils import FrappeTestCase + + +class TestGCSSettings(FrappeTestCase): + pass diff --git a/rag_service/rag_service/doctype/gemini_settings/__init__.py b/rag_service/rag_service/doctype/gemini_settings/__init__.py new file mode 100644 index 0000000..272296f --- /dev/null +++ b/rag_service/rag_service/doctype/gemini_settings/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2026, TAP and contributors +# For license information, please see license.txt diff --git a/rag_service/rag_service/doctype/gemini_settings/gemini_settings.js b/rag_service/rag_service/doctype/gemini_settings/gemini_settings.js new file mode 100644 index 0000000..8a42017 --- /dev/null +++ b/rag_service/rag_service/doctype/gemini_settings/gemini_settings.js @@ -0,0 +1,4 @@ +// Copyright (c) 2026, TAP and contributors +// For license information, please see license.txt + +frappe.ui.form.on("Gemini Settings", {}); diff --git a/rag_service/rag_service/doctype/gemini_settings/gemini_settings.json b/rag_service/rag_service/doctype/gemini_settings/gemini_settings.json new file mode 100644 index 0000000..42c14df --- /dev/null +++ b/rag_service/rag_service/doctype/gemini_settings/gemini_settings.json @@ -0,0 +1,97 @@ +{ + "actions": [], + "allow_rename": 1, + "creation": "2026-02-06 10:00:00.000000", + "doctype": "DocType", + "engine": "InnoDB", + "field_order": [ + "model_name", + "location", + "project_id", + "credentials_json", + "temperature", + "max_tokens", + "is_active", + "is_default", + "description" + ], + "fields": [ + { + "fieldname": "model_name", + "fieldtype": "Data", + "label": "Model Name" + }, + { + "default": "us-central1", + "fieldname": "location", + "fieldtype": "Data", + "label": "Location" + }, + { + "fieldname": "project_id", + "fieldtype": "Data", + "label": "Project ID" + }, + { + "fieldname": "credentials_json", + "fieldtype": "Code", + "in_list_view": 1, + "label": "Service Account Credentials (JSON)", + "options": "JSON", + "reqd": 1 + }, + { + "default": "0", + "fieldname": "temperature", + "fieldtype": "Float", + "label": "Temperature" + }, + { + "default": "2000", + "fieldname": "max_tokens", + "fieldtype": "Int", + "label": "Max Tokens" + }, + { + "default": "0", + "fieldname": "is_active", + "fieldtype": "Check", + "label": "Is Active" + }, + { + "default": "0", + "fieldname": "is_default", + "fieldtype": "Check", + "label": "Is Default" + }, + { + "fieldname": "description", + "fieldtype": "Small Text", + "label": "Description" + } + ], + "index_web_pages_for_search": 1, + "links": [], + "modified": "2026-02-06 10:00:00.000000", + "modified_by": "Administrator", + "module": "Rag Service", + "name": "Gemini Settings", + "owner": "Administrator", + "permissions": [ + { + "create": 1, + "delete": 1, + "email": 1, + "export": 1, + "print": 1, + "read": 1, + "report": 1, + "role": "System Manager", + "share": 1, + "write": 1 + } + ], + "sort_field": "modified", + "sort_order": "DESC", + "states": [] +} diff --git a/rag_service/rag_service/doctype/gemini_settings/gemini_settings.py b/rag_service/rag_service/doctype/gemini_settings/gemini_settings.py new file mode 100644 index 0000000..086c765 --- /dev/null +++ b/rag_service/rag_service/doctype/gemini_settings/gemini_settings.py @@ -0,0 +1,8 @@ +# Copyright (c) 2026, TAP and contributors +# For license information, please see license.txt + +from frappe.model.document import Document + + +class GeminiSettings(Document): + pass diff --git a/rag_service/rag_service/doctype/gemini_settings/test_gemini_settings.py b/rag_service/rag_service/doctype/gemini_settings/test_gemini_settings.py new file mode 100644 index 0000000..95d1e94 --- /dev/null +++ b/rag_service/rag_service/doctype/gemini_settings/test_gemini_settings.py @@ -0,0 +1,9 @@ +# Copyright (c) 2026, TAP and contributors +# For license information, please see license.txt + +# import frappe +from frappe.tests.utils import FrappeTestCase + + +class TestGeminiSettings(FrappeTestCase): + pass diff --git a/rag_service/rag_service/doctype/llm_settings/llm_settings.json b/rag_service/rag_service/doctype/llm_settings/llm_settings.json index 35aba59..fa59620 100644 --- a/rag_service/rag_service/doctype/llm_settings/llm_settings.json +++ b/rag_service/rag_service/doctype/llm_settings/llm_settings.json @@ -7,20 +7,23 @@ "field_order": [ "provider", "model_name", - "description", "temperature", "max_tokens", + "api_key", + "api_secret", + "location", + "project_id", + "credentials_json", "is_active", "is_default", - "api_key", - "api_secret" + "description" ], "fields": [ { "fieldname": "provider", "fieldtype": "Select", "label": "LLM Provider", - "options": "OpenAI\nAnthropic\nTogether AI\nCustom" + "options": "OpenAI\nAnthropic\nTogether AI\nGemini\nCustom" }, { "fieldname": "model_name", @@ -44,6 +47,24 @@ "fieldname": "max_tokens", "fieldtype": "Int", "label": "Max Tokens" + }, + { + "default": "us-central1", + "fieldname": "location", + "fieldtype": "Data", + "label": "Location" + }, + { + "fieldname": "project_id", + "fieldtype": "Data", + "label": "Project ID" + }, + { + "fieldname": "credentials_json", + "fieldtype": "Code", + "in_list_view": 1, + "label": "Service Account Credentials (JSON)", + "options": "JSON" }, { "default": "0", diff --git a/rag_service/rag_service/doctype/prompt_segment/__init__.py b/rag_service/rag_service/doctype/prompt_segment/__init__.py new file mode 100644 index 0000000..8980646 --- /dev/null +++ b/rag_service/rag_service/doctype/prompt_segment/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2026, TAP and contributors + diff --git a/rag_service/rag_service/doctype/prompt_segment/prompt_segment.js b/rag_service/rag_service/doctype/prompt_segment/prompt_segment.js new file mode 100644 index 0000000..1577e89 --- /dev/null +++ b/rag_service/rag_service/doctype/prompt_segment/prompt_segment.js @@ -0,0 +1 @@ +frappe.ui.form.on("Prompt Segment", {}); diff --git a/rag_service/rag_service/doctype/prompt_segment/prompt_segment.json b/rag_service/rag_service/doctype/prompt_segment/prompt_segment.json new file mode 100644 index 0000000..23d2bef --- /dev/null +++ b/rag_service/rag_service/doctype/prompt_segment/prompt_segment.json @@ -0,0 +1,88 @@ +{ + "actions": [], + "allow_rename": 1, + "autoname": "field:segment_name", + "creation": "2026-04-27 00:00:00.000000", + "doctype": "DocType", + "engine": "InnoDB", + "field_order": [ + "segment_name", + "segment_type", + "content", + "is_active", + "version", + "description", + "last_used" + ], + "fields": [ + { + "fieldname": "segment_name", + "fieldtype": "Data", + "in_list_view": 1, + "label": "Segment Name", + "reqd": 1 + }, + { + "fieldname": "segment_type", + "fieldtype": "Select", + "in_list_view": 1, + "label": "Segment Type", + "options": "system\ngrading\nsubject\noutput", + "reqd": 1 + }, + { + "fieldname": "content", + "fieldtype": "Text", + "label": "Content", + "options": "Text", + "reqd": 1 + }, + { + "default": "0", + "fieldname": "is_active", + "fieldtype": "Check", + "label": "Is Active" + }, + { + "fieldname": "version", + "fieldtype": "Int", + "label": "Version" + }, + { + "fieldname": "description", + "fieldtype": "Small Text", + "label": "Description" + }, + { + "fieldname": "last_used", + "fieldtype": "Datetime", + "label": "Last Used" + } + ], + "index_web_pages_for_search": 1, + "links": [], + "modified": "2026-04-27 00:00:00.000000", + "modified_by": "Administrator", + "module": "Rag Service", + "name": "Prompt Segment", + "owner": "Administrator", + "permissions": [ + { + "create": 1, + "delete": 1, + "email": 1, + "export": 1, + "print": 1, + "read": 1, + "report": 1, + "role": "System Manager", + "share": 1, + "write": 1 + } + ], + "search_fields": "segment_name,segment_type,description", + "sort_field": "modified", + "sort_order": "DESC", + "states": [], + "title_field": "segment_name" +} diff --git a/rag_service/rag_service/doctype/prompt_segment/prompt_segment.py b/rag_service/rag_service/doctype/prompt_segment/prompt_segment.py new file mode 100644 index 0000000..e64f03f --- /dev/null +++ b/rag_service/rag_service/doctype/prompt_segment/prompt_segment.py @@ -0,0 +1,10 @@ +# Copyright (c) 2026, TAP and contributors +# For license information, please see license.txt + +from frappe.model.document import Document + + +class PromptSegment(Document): + def validate(self): + if not self.version: + self.version = 1 diff --git a/rag_service/rag_service/doctype/prompt_segment/test_prompt_segment.py b/rag_service/rag_service/doctype/prompt_segment/test_prompt_segment.py new file mode 100644 index 0000000..6c419aa --- /dev/null +++ b/rag_service/rag_service/doctype/prompt_segment/test_prompt_segment.py @@ -0,0 +1,7 @@ +# Copyright (c) 2026, TAP and contributors + +from frappe.tests.utils import FrappeTestCase + + +class TestPromptSegment(FrappeTestCase): + pass diff --git a/rag_service/rag_service/doctype/prompt_template/prompt_template.json b/rag_service/rag_service/doctype/prompt_template/prompt_template.json index 0176106..1275720 100644 --- a/rag_service/rag_service/doctype/prompt_template/prompt_template.json +++ b/rag_service/rag_service/doctype/prompt_template/prompt_template.json @@ -1,14 +1,21 @@ { "actions": [], "allow_rename": 1, + "autoname": "field:template_name", "creation": "2024-11-01 13:45:08.976278", "doctype": "DocType", "engine": "InnoDB", "field_order": [ "template_name", "assignment_type", - "system_prompt", - "user_prompt", + "course_vertical", + "media_type", + "prompt_type", + "activity_type", + "system_segment", + "grading_segment", + "subject_segment", + "output_segment", "response_format", "variables", "is_active", @@ -34,14 +41,66 @@ "reqd": 1 }, { - "fieldname": "system_prompt", - "fieldtype": "Text", - "label": "System Prompt" + "fieldname": "course_vertical", + "fieldtype": "Select", + "in_list_view": 1, + "label": "Course Vertical", + "options": "Arts\nDance\nCoding\nScience Lab\nElectronics\nFinancial Literacy\nCommerce", + "reqd": 1 }, { - "fieldname": "user_prompt", - "fieldtype": "Text", - "label": "User Prompt Template" + "default": "image", + "fieldname": "media_type", + "fieldtype": "Select", + "in_list_view": 1, + "label": "Media Type", + "options": "image\nvideo\naudio\ntext", + "reqd": 1 + }, + { + "fieldname": "prompt_type", + "default": "feedback", + "fieldtype": "Select", + "in_list_view": 1, + "label": "Prompt Type", + "options": "evaluation\nfeedback\nboth", + "reqd": 1 + }, + { + "fieldname": "activity_type", + "fieldtype": "Select", + "in_list_view": 1, + "label": "Activity Type", + "options": "Regular\nChallenge", + "reqd": 1 + }, + { + "fieldname": "system_segment", + "fieldtype": "Link", + "label": "System Segment", + "options": "Prompt Segment", + "reqd": 1 + }, + { + "fieldname": "grading_segment", + "fieldtype": "Link", + "label": "Grading Segment", + "options": "Prompt Segment", + "reqd": 1 + }, + { + "fieldname": "subject_segment", + "fieldtype": "Link", + "label": "Subject Segment", + "options": "Prompt Segment", + "reqd": 1 + }, + { + "fieldname": "output_segment", + "fieldtype": "Link", + "label": "Output Segment", + "options": "Prompt Segment", + "reqd": 1 }, { "fieldname": "response_format", @@ -103,7 +162,9 @@ "write": 1 } ], + "search_fields": "template_name,media_type,prompt_type,course_vertical,activity_type,category,tags", "sort_field": "modified", "sort_order": "DESC", - "states": [] -} \ No newline at end of file + "states": [], + "title_field": "template_name" +} diff --git a/rag_service/rag_service/doctype/prompt_template/prompt_template.py b/rag_service/rag_service/doctype/prompt_template/prompt_template.py index 29ffa5d..fb8ece2 100644 --- a/rag_service/rag_service/doctype/prompt_template/prompt_template.py +++ b/rag_service/rag_service/doctype/prompt_template/prompt_template.py @@ -1,9 +1,29 @@ # Copyright (c) 2024, TAP and contributors # For license information, please see license.txt -# import frappe +import frappe +from frappe import _ from frappe.model.document import Document class PromptTemplate(Document): - pass + def validate(self): + expected_segment_types = { + "system_segment": "system", + "grading_segment": "grading", + "subject_segment": "subject", + "output_segment": "output", + } + + for fieldname, expected_type in expected_segment_types.items(): + segment_name = self.get(fieldname) + if not segment_name: + continue + + segment_type = frappe.db.get_value("Prompt Segment", segment_name, "segment_type") + if segment_type != expected_type: + frappe.throw( + _("{0} must reference a Prompt Segment of type '{1}'").format( + self.meta.get_label(fieldname), expected_type + ) + ) diff --git a/rag_service/scripts/assignment b/rag_service/scripts/assignment new file mode 100644 index 0000000..86bcad3 --- /dev/null +++ b/rag_service/scripts/assignment @@ -0,0 +1 @@ +"assignment": {'name': 'VA_L2_CA1-Basic','description': '"Have you ever noticed how colorful walls, buses, shops, and even market stalls look? Things like movie posters, cricket match ads, or the signs on sweet shops are all in bright colors and bold designs. These ads grab your attention really quickly, right?\n\nThis idea was made popular by a famous 20th-century artist named Andy Warhol with something called Pop Art. He turned everyday things like bottles, soup cans, and pictures of famous people into bright and bold artworks using bright colors and repeating patterns.\n\nPop Art is when we take ordinary objects and everyday items and turn them into bold, fun, and eye-catching art!\n\nToday, we\'re going to be inspired by this idea and design a product in a colorful and vibrant Pop Art style. Just like Andy Warhol, we will advertise our product in a fun way. \n\nChoose a simple product — like a cup, a food item, or maybe something from your school supplies. Draw the same product repeatedly, fill it with bright and eye-catching colors, and use different lines and patterns for details.\n\nHere are the important steps to follow so your project is complete:\n- Your artwork should clearly show a product.\n- The colors in your artwork should immediately attract the audience.\n- The product should be drawn repeatedly, like Andy Warhol did.\n- There should be some details shown with lines and patterns.\n\nShare a picture of your final artwork with TAP Buddy. And don\'t forget to take the quiz!\n\nRemember, kids, art is not just a hobby — it\'s a way to express your ideas and imagination. So, explore your creativity and see how simple objects can become vibrant and exciting art! Keep creating and have fun with colors!"', 'assignment_type': 'Practical', 'subject': 'Arts', 'rubric_grades': [{'name': '1-Content Knowledge-Arts-Level 2-Arts-C0141', 'idx': 1, 'rubric_name': None, 'grade_value': 1, 'grade_name': 'Novice', 'grade_description': '- Invalid or no submission. \n- No product drawn', 'skill_name': 'Content Knowledge', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}, {'name': '2-Content Knowledge-Arts-Level 2-Arts-C0141', 'idx': 2, 'rubric_name': None, 'grade_value': 2, 'grade_name': 'Beginner', 'grade_description': '- Random product drawn; no repetition \n- Colored randomly without referring to color wheel', 'skill_name': 'Content Knowledge', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}, {'name': '3-Content Knowledge-Arts-Level 2-Arts-C0141', 'idx': 3, 'rubric_name': None, 'grade_value': 3, 'grade_name': 'Emerging', 'grade_description': '- A thoughtful product drawn; repeated a few times, but not andy warhol style \n- Colors are bold, but lacks contrast', 'skill_name': 'Content Knowledge', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}, {'name': '4-Content Knowledge-Arts-Level 2-Arts-C0141', 'idx': 4, 'rubric_name': None, 'grade_value': 4, 'grade_name': 'Proficient', 'grade_description': '- Product is repeated in a grid pattern, similar to Andy Warhol \n- Colors are bold, contrasting, and visually engaging.', 'skill_name': 'Content Knowledge', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}, {'name': '5-Content Knowledge-Arts-Level 2-Arts-C0141', 'idx': 5, 'rubric_name': None, 'grade_value': 5, 'grade_name': 'Expert', 'grade_description': '- Product is repeated in a grid pattern, similar to Andy Warhol \n- Choice of color matches the mood of the product, and has been enhanced with the use lines & patterns', 'skill_name': 'Content Knowledge', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}, {'name': '1-Creativity-Arts-Level 2-Arts-C0141', 'idx': 6, 'rubric_name': None, 'grade_value': 1, 'grade_name': 'Novice', 'grade_description': '- Invalid or no submission. \n- No product drawn', 'skill_name': 'Creativity', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}, {'name': '2-Creativity-Arts-Level 2-Arts-C0141', 'idx': 7, 'rubric_name': None, 'grade_value': 2, 'grade_name': 'Beginner', 'grade_description': '- Chosen product is basic & way too common. Eg: A cup, A pencil \n- Dull irrevelant colors are chosen. Eg: Orange & Pink for a pencil', 'skill_name': 'Creativity', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}, {'name': '3-Creativity-Arts-Level 2-Arts-C0141', 'idx': 8, 'rubric_name': None, 'grade_value': 3, 'grade_name': 'Emerging', 'grade_description': '- Chosen product is common, but has been presented differently than usual.\n- Product has been repeated, but is not cohesive. \n- Basic bright colors have been choosen', 'skill_name': 'Creativity', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}, {'name': '4-Creativity-Arts-Level 2-Arts-C0141', 'idx': 9, 'rubric_name': None, 'grade_value': 4, 'grade_name': 'Proficient', 'grade_description': '- Chosen product is an interesting pop art related object. \n- Product has been repeated in a grid pattern\n- Bright, complementary colors as per color wheel have been used \n\nExample: A shoe or a coffee cup, repeated in a grid filled with distinct patterns (e.g., stripes, dots)', 'skill_name': 'Creativity', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}, {'name': '5-Creativity-Arts-Level 2-Arts-C0141', 'idx': 10, 'rubric_name': None, 'grade_value': 5, 'grade_name': 'Expert', 'grade_description': '- Chosen product is imaginary & unique \n- Repeated in a grid pattern\n- Brightly colored detailed with multiple patterns and lines \n\nExample: An imaginative creature like an alien, monster, or fictional character with unique details, bold lines, and multiple creative patterns like swirls, zigzags, geometric shapes', 'skill_name': 'Creativity', 'course_vertical': 'Arts', 'course_level': 'Level 2-Arts-C0141', 'parent': 'VA_L2_CA1-Basic', 'parentfield': 'rubric_grades', 'parenttype': 'Assignment', 'doctype': 'Rubric Grade'}]} diff --git a/rag_service/scripts/check_active_llms.py b/rag_service/scripts/check_active_llms.py deleted file mode 100644 index 502b9e0..0000000 --- a/rag_service/scripts/check_active_llms.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python - -import sys -import os - -# Add frappe to path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'apps')) - -def check_active_llms(): - """Check all active LLM settings and which one would be selected""" - - # Get all active settings - all_active = frappe.get_list( - "LLM Settings", - filters={"is_active": 1}, - fields=["name", "provider", "model_name", "modified", "creation"], - order_by="modified desc" - ) - - print(f"\n=== Active LLM Settings ({len(all_active)} found) ===\n") - - for i, setting in enumerate(all_active): - status = "✓ WOULD BE SELECTED" if i == 0 else " Would be ignored" - print(f"{status}: {setting.provider} - {setting.model_name}") - print(f" Name: {setting.name}") - print(f" Modified: {setting.modified}") - print(f" Created: {setting.creation}") - print() - - if len(all_active) > 1: - print("⚠️ WARNING: Multiple active LLM settings found!") - print(" Only the first one will be used.") - print(" Please deactivate all but one to avoid confusion.") - elif len(all_active) == 0: - print("❌ No active LLM settings found!") - print(" Please activate at least one LLM setting.") - - # Show what setup_llm would select - selected = frappe.get_list( - "LLM Settings", - filters={"is_active": 1}, - limit=1, - order_by="modified desc" - ) - - if selected: - settings = frappe.get_doc("LLM Settings", selected[0].name) - print(f"\n✓ The system will use: {settings.provider} - {settings.model_name}") - print(f" Name: {settings.name}") - -if __name__ == "__main__": - import frappe - - # Use the site name from command line or default - site_name = sys.argv[1] if len(sys.argv) > 1 else 'rag-dev.localhost' - - try: - frappe.init(site=site_name) - frappe.connect() - check_active_llms() - except Exception as e: - print(f"Error: {str(e)}") - print(f"Make sure the site '{site_name}' exists") - finally: - if frappe and frappe.db: - frappe.destroy() diff --git a/rag_service/scripts/console_consumer.py b/rag_service/scripts/console_consumer.py new file mode 100644 index 0000000..22509d2 --- /dev/null +++ b/rag_service/scripts/console_consumer.py @@ -0,0 +1,5 @@ +from rag_service.utils.rabbitmq_consumer import RabbitMQConsumer +consumer = RabbitMQConsumer(debug=True) +if consumer.test_connection(): + print("Starting RabbitMQ consumer...") + consumer.start_consuming() \ No newline at end of file diff --git a/rag_service/scripts/sample output.json b/rag_service/scripts/sample output.json new file mode 100644 index 0000000..ef3da90 --- /dev/null +++ b/rag_service/scripts/sample output.json @@ -0,0 +1,41 @@ +{ + "submission_id": "IMSUB-2603081790", + "student_id": "ST00000206", + "assignment_id": "VA_L2_CA1-Basic", + "feedback": { + "overall_feedback": "Wow, this drawing of french fries looks delicious! You've used bright yellow and orange colors, which really captures the fun feeling of Pop Art. For your next piece, remember that Pop Art often repeats the same image. Try drawing these fries multiple times to make your artwork even more powerful. Keep creating!", + "overall_feedback_translated": "वाह, आपके बनाए हुए फ्रेंच फ्राइज़ बहुत स्वादिष्ट लग रहे हैं! आपने चमकीले पीले और नारंगी रंगों का बहुत अच्छा इस्तेमाल किया है, जो पॉप आर्ट वाली मज़ेदार फीलिंग देता है। अगली बार, यह याद रखें कि पॉप आर्ट में अक्सर एक ही चीज़ को बार-बार बनाया जाता है। अपनी कला को और भी प्रभावशाली बनाने के लिए इन फ्राइज़ को कई बार बनाने की कोशिश करें। बनाते रहें!", + "final_grade": 60, + "plagiarism_output": { + "is_plagiarized": false, + "is_ai_generated": false, + "match_type": "original", + "plagiarism_source": "none", + "similarity_score": 0.0, + "ai_detection_source": "none", + "ai_confidence": 0.0, + "similar_sources": [] + }, + "rubric_evaluations": [ + { + "Skill": "Content Knowledge", + "grade_value": 2, + "observation": "Evidence for Grade 2: The submission meets the criterion 'An object is clearly drawn and colored but not in a grid layout'. An object, which appears to be French fries in a carton, is drawn and colored. Why not Grade 3: The criterion 'Object is atleast clearly drawn in a grid layout' is not met. The artwork consists of a single object, not the same object repeated in a grid. Why not Grade 1: The submission exceeds Grade 1 as a clearly drawn object is present and the image is fully visible." + }, + { + "Skill": "Creativity", + "grade_value": 4, + "observation": "Evidence for Grade 4: The submission meets the criteria for this level. The criteria 'The object is drawn and colored neatly within its boundary/outline' and 'The object should not be a cup, banana, popcorn, cold drink can or soup can' are both satisfied. The object drawn, French fries, is not on the prohibited list, and it is both drawn and colored. Why not Grade 5: The criterion 'Creative patterns like swirls, zigzags, geometric shapes, lines... are also drawn in the object or in the background' is not met. The vertical lines on the carton are considered natural object details, and the background coloring consists of strokes rather than an intentional pattern. Why not Grade 3: The submission exceeds Grade 3 because the criterion 'An object is drawn but not colored' is not applicable, as the object in the artwork is colored." + } + ], + "strengths": [ + "cost:0.017661", + "Feedback_LP:-0.8927426180996738", + "Eval_LP:-0.6946198258153099" + ], + "areas_for_improvement": [], + "encouragement": "", + "translation_language": "Hindi" + }, + "generated_at": "2026-03-07T19:10:11.239538" +} \ No newline at end of file diff --git a/rag_service/api/__init__.py b/rag_service/scripts/service_account_key.json similarity index 100% rename from rag_service/api/__init__.py rename to rag_service/scripts/service_account_key.json diff --git a/rag_service/scripts/test_consumer_payload.py b/rag_service/scripts/test_consumer_payload.py new file mode 100644 index 0000000..e3aa82f --- /dev/null +++ b/rag_service/scripts/test_consumer_payload.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +""" +Test script to send payloads directly to the RabbitMQ consumer for testing. +Simply modify the payload dictionary below and run: python test_consumer_payload.py +""" + +import sys +import json +import pika +from datetime import datetime +from pathlib import Path + +# Add the apps directory to the path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +image_5 = "https://storage.googleapis.com/bucket_tap_1/uploads/21/09/23_Sibling_issue/20251105064001_C107277_F32580_M18081088.png" +image_1 = "https://storage.googleapis.com/bucket_tap_1/uploads/added_in_coll/20251007160901_C107788_F32580_M16096985.png" +image_1_digital = "https://storage.googleapis.com/bucket_tap_1/uploads/11/AugProccess/20251007114200_C128977_F32580_M16044526.png" +image_1_cropped = "https://storage.googleapis.com/bucket_tap_1/uploads/21/09/23_Sibling_issue/20251008024900_C132450_F32580_M16113857.png" + +video_original = "https://storage.googleapis.com/bucket_tap_1/uploads/Activity2_B1_Issue(Retrigger)/20251104121000_C445224_F32580_M17966712.mp4" + +# ============================================================================ +# EDIT THE PAYLOAD BELOW TO TEST DIFFERENT DATA +# ============================================================================ +PAYLOAD = { + "submission_id": "SUB-2605112507", + "student_id": "ST00002495", + "submission_type": "text", + "submission_text": "where to?", + "submission_url": None, + "program_enrollment": "1povvk2ga4", + "week": 1, + "is_primary": 1, + "escalation_step_at_submit": 0, + "archetype": "Dormant", + "experiment_arm": "default", + "expected_submission_type": "image", + "language": "Hindi", + "batch": "1-BT0205", + "current_week": 1, + "current_path": "Core", + "current_tier": "Basic", + "course_level": "Level 1-Arts-C0070", + "created_at": "2026-05-11 23:01:31.374768", + "similar_sources": [], + "similarity_score": 0.0, + "is_plagiarized": False, + "match_type": "original", + "assignment_id": "VA_L1_CA1-Basic", + "is_ai_generated": False, + "ai_detection_source": "", + "ai_confidence": 0.0, + "plagiarism_source": "" +} +# ============================================================================ + + + +def get_rabbitmq_settings(): + """Get RabbitMQ settings from Frappe""" + try: + return { + "host": "rabbit-01.lmq.cloudamqp.com", + "port": "5672", + "username": "aoafhbrm", + "password": "****", + "virtual_host": "aoafhbrm", + "queue": "plg_result_q", + } + except Exception as e: + print(f"Error fetching RabbitMQ settings: {e}") + return None + + +def connect_to_rabbitmq(settings): + """Establish connection to RabbitMQ""" + try: + credentials = pika.PlainCredentials(settings["username"], settings["password"]) + parameters = pika.ConnectionParameters( + host=settings["host"], + port=settings["port"], + virtual_host=settings["virtual_host"], + credentials=credentials, + heartbeat=600, + blocked_connection_timeout=300, + ) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + print(f"\n✓ Connected to RabbitMQ at {settings['host']}:{settings['port']}") + return connection, channel + except Exception as e: + print(f"\n✗ RabbitMQ Connection Error: {e}") + return None, None + + +def send_payload(connection, channel, queue_name, payload): + """Send a payload to RabbitMQ queue""" + try: + # Declare queue to ensure it exists + channel.queue_declare(queue=queue_name, durable=True) + + # Send message + channel.basic_publish( + exchange="", + routing_key=queue_name, + body=json.dumps(payload, ensure_ascii=False), + properties=pika.BasicProperties( + delivery_mode=2, # persistent + content_type="application/json", + ), + ) + print(f"\n✓ Payload sent to queue '{queue_name}'") + print(f" Submission ID: {payload.get('submission_id')}") + print(f" Student ID: {payload.get('student_id')}") + print(f" Assignment ID: {payload.get('assignment_id')}") + return True + except Exception as e: + print(f"\n✗ Error sending payload: {e}") + return False + finally: + if connection and not connection.is_closed: + connection.close() + + +def main(): + # Get RabbitMQ settings + settings = get_rabbitmq_settings() + if not settings: + print("\n✗ Unable to retrieve RabbitMQ settings") + return + + print(f"\n=== RabbitMQ Consumer Test Script ===") + print(f"Host: {settings['host']}") + print(f"Port: {settings['port']}") + print(f"Queue: {settings['queue']}") + + # Connect to RabbitMQ + connection, channel = connect_to_rabbitmq(settings) + if not connection or not channel: + return + + try: + # Validate required fields + required_fields = [ + "submission_id", + "student_id", + "assignment_id", + "submission_type", + "submission_url", + "submission_text", + ] + missing = [f for f in required_fields if f not in PAYLOAD] + if missing: + print(f"\n✗ Missing required fields: {', '.join(missing)}") + return + + print(f"\n--- Sending Payload ---") + print(f"Payload:\n{json.dumps(PAYLOAD, indent=2)}") + send_payload(connection, channel, settings["queue"], PAYLOAD) + + finally: + if connection and not connection.is_closed: + connection.close() + print("\n✓ Disconnected from RabbitMQ") + + +if __name__ == "__main__": + main() diff --git a/rag_service/scripts/test_feedback_prompt.py b/rag_service/scripts/test_feedback_prompt.py new file mode 100644 index 0000000..b91c5e6 --- /dev/null +++ b/rag_service/scripts/test_feedback_prompt.py @@ -0,0 +1,117 @@ +message_data_plg = { + "submission_id": "IMSUB-2601150154", + "student_id": "2724532", + "submission_type": "image", + "submission_url": "https://storage.googleapis.com/tap-lms-submissions/submissions/IMSUB-2601150154_IMSUB-25121730259_image.jpg", + "submission_text": None, + "created_at": "2026-01-15 18:26:12.316528", + "similar_sources": [ + { + "submission_id": "2e7ef3cb-aa66-4eb2-9235-b43698e9e92d", + "student_id": "2724533", + "assignment_id": "fun-faces-1313", + "image_url": "https://storage.googleapis.com/tap-lms-submissions/submissions/IMSUB-2601010088_IMSUB-25121730259_image.jpg", + "similarity_score": 1.0, + "role": "peer_exact_match" + }, + { + "submission_id": "ca593d80-015a-4195-ab60-627b43969352", + "student_id": "2724533", + "assignment_id": "fun-faces-1313", + "image_url": "https://storage.googleapis.com/tap-lms-submissions/submissions/IMSUB-2601090012_IMSUB-25121730259_image.jpg", + "similarity_score": 1.0, + "role": "peer_exact_match" + } + ], + "similarity_score": 1.0, + "is_plagiarized": True, + "match_type": "exact_duplicate", + "assignment_id": "SC_L4_CA1-Basic", + "is_ai_generated": False, + "ai_detection_source": "", + "ai_confidence": 0.0, + "plagiarism_source": "peer_collusion" +} + +message_data = { + "submission_id": "IMSUB-2601160155", + "student_id": "2724532", + "submission_type": "image", + "submission_url": "https://storage.googleapis.com/tap-lms-submissions/submissions/IMSUB-2601160155_20251105064001_C107277_F32580_M18081088.png", + "submission_text": None, + "created_at": "2026-01-16 10:49:13.515436", + "similar_sources": "null", + "similarity_score": "null", + "is_plagiarized": False, + "match_type": "original", + "assignment_id": "SC_L4_CA1-Basic", + "is_ai_generated": False, + "ai_detection_source": "", + "ai_confidence": 0.0, + "plagiarism_source": "" +} + +message_data = {'submission_id': 'IMSUB-2601160156', 'student_id': '27245334', +'submission_type': 'image', +'submission_url': 'https://storage.googleapis.com/bucket_tap_1/uploads/11/AugProccess/20251104143002_C5095389_F32580_M18009048.png', +'submission_text': None, + 'created_at': '2026-01-16 13:03:49.054548', 'similar_sources': None, 'similarity_score': None, 'is_plagiarized': False, 'match_type': 'original', 'assignment_id': 'SC_L4_CA1-Basic', + 'is_ai_generated': False, 'ai_detection_source': '', 'ai_confidence': 0.0, 'plagiarism_source': ''} + + + + +assignment_context_sc = { + 'assignment': { + 'name': 'SC_L4_CA1', + 'description': "Hello coders!\nHave you ever shared your basic details for sports or school events? Did you also think that if you put these details in a proper format, it would become so easy to share them?\n\nSo let’s solve this problem and complete a challenge.\nYou have to create a bio-data program with the help of the Pydroid 3 application, which prints your Name, Class, Age, Tap Course and Favourite Activity on the screen.\nYou can easily complete this challenge using simple print statements.\n\nI also created my bio-data program in the same way and printed all the details on the output screen!\nFirst, I displayed the Name – Riya Roy,\nthen I displayed the Class – 11th, Age – 16, Tap course – Python, and Favourite Activity – Playing Chess.\n\nYou too create your own bio-data in the coding area and take a screenshot of the output window. Then share your output screenshot with your Tap Buddy on WhatsApp.\nAnd don’t forget to attend the quiz questions after completing the activity.\nSo see you in the next activity! Till then, keep creating, keep innovating", + 'type': 'Practical', + 'subject': 'Coding', + 'submission_guidelines': None, + 'reference_images': [], + 'max_score': '100', + 'rubrics': { + 'Content Knowledge': [ + {'grade_value': 1, 'grade_description': "Blank screenshot, unrelated screenshot, or no visible code/output.\nNo project-related content. The screenshot does not show the code or output window."}, + {'grade_value': 2, 'grade_description': "Output is present but lacks necessary details or contains incorrect formatting/ code is present only a few lines but not complete. The program attempts to categorize grades but is incomplete, with missing conditions or incorrect output."}, + {'grade_value': 3, 'grade_description': "The program categorizes grades based on marks but may have minor issues in logic or output formatting.\nShows some logic using if-else conditional statements."}, + {'grade_value': 4, 'grade_description': "Code mostly correct and clear. \nThe program categorizes grades correctly based on marks (Grade A for 90-100, B for 75-89, etc.). Code logic is clear, and output is correct.\nFormatting is jumbled but correct"}, + {'grade_value': 5, 'grade_description': "The program categorizes grades correctly, handles invalid input (e.g., outside 0-100), and outputs accurate grades. The logic is well-organized, and the code is clean."} + ], + 'Problem Solving': [ + {'grade_value': 1, 'grade_description': "Blank screenshot, unrelated screenshot, or no visible code/output.\nNo project-related content. The screenshot does not show the code or output window."}, + {'grade_value': 2, 'grade_description': "Code / Output window is visible but with limited details. The program runs but shows incorrect logic or output, e.g., grades are not correctly assigned or displayed."}, + {'grade_value': 3, 'grade_description': "Output or code shows partial understanding. \nThe program runs and shows the correct grades for valid input, but output may not be formatted correctly or program is not formatted correctly."}, + {'grade_value': 4, 'grade_description': "The program correctly assigns grades based on marks input (e.g., A for 90-100, B for 75-89). Handles invalid input correctly."}, + {'grade_value': 5, 'grade_description': "The program works flawlessly, correctly categorizing grades for all valid inputs, showing the correct grade, and handling invalid input (outside 0-100) with appropriate feedback."} + ] + } + }, + 'learning_objectives': [] +} + +assignment_context = {'assignment': {'name': 'VA_L1_CA1', 'description': '"“Close your eyes and imagine a creature that no one has ever seen before. Maybe it has dragon wings, zebra stripes, or fish scales…”\n“Today, your challenge is to invent it! Your task is to combine lines, shapes, and patterns to create a new, magical, and imaginative creature.”\n“Choose at least 3 shapes and 3 patterns for your creature. Then add unique features like wings, horns, multiple eyes, or any magical detail. Your creature should show your imagination!”\n“Step 1: Choose shapes – circles, triangles, rectangles, or ovals – and use them to form body parts. Step 2: Add patterns – stripes, spirals, zigzags, or dots – use at least three different patterns. Step 3: Add creative twists – mix and match special features like an elephant trunk with dragon wings, or a fish tail with monster horns. Following these steps will make your creature alive and interesting.”\n“You can add extra creativity to your creature – more patterns, colors, or unusual features. Let your imagination run free; the more unique and wild, the more fun it will be!”\n“Here’s how to make your creature successful:” • Use at least 3 shapes • Use at least 3 patterns • Creature should look creative and imaginative • Artwork should be neat and complete • Features should be unique and interesting\n“Once your drawing is complete, click a photo and share it with TAP Buddy. We can’t wait to see your creatures come alive on paper!”\n“Well done! You did amazing today. Thank you for creating with us—see you in the next activity!”"', 'type': 'Practical', 'subject': 'Arts', 'submission_guidelines': None, 'reference_images': [], 'max_score': '100', 'rubrics': {'Content Knowledge': [{'grade_value': 1, 'grade_description': '- Invalid or no submission.\n- The art work is digital and not hand drawn.\n - No shapes or patterns at all.'}, {'grade_value': 2, 'grade_description': '- Fewer than 3 shapes OR fewer than 3 patterns. \n- Shapes/patterns are very messy or hard to see. \n\n- Example: Only 1-2 circles drawn, no patterns or just 1 stripe.'}, {'grade_value': 3, 'grade_description': '- Uses 3 shapes AND 3 patterns, but some are unclear or blended together. \n- Not easy to count or spot all. \n\n- Example: Circles, triangles, rectangles with stripes, dots, zigzags but lines overlap and mix up.'}, {'grade_value': 4, 'grade_description': '- Clearly shows at least 3 different shapes AND 3 different patterns. \n- Easy to see and count each one. \n\n- Example: Circle body, triangle ears, rectangle legs with clear stripes on tail, dots on wings, zigzags on back.'}, {'grade_value': 5, 'grade_description': '- Uses more than 3 shapes AND more than 3 patterns, all clear and well-placed. \n- Extra shapes/patterns make it even better. \n\n- Example: Circle head, oval body, triangle wings, rectangle tail with stripes, dots, spirals, zigzags, plus checkerboard on legs.'}], 'Creativity': [{'grade_value': 1, 'grade_description': '- Invalid or no submission. \n- The art work is digital and not hand drawn.\n- No creature drawn.'}, {'grade_value': 2, 'grade_description': '- Creature has only 1-2 simple features. \n- Drawing is messy (colors outside lines) or not finished. \n- Looks boring, not imaginative. \n\n- Example: Plain circle with 1 basic wing; colors spill over edges.'}, {'grade_value': 3, 'grade_description': '- Creature has 3+ features with some imagination. \n- Drawing mostly neat but has small messy spots or unfinished parts. \n- Features are okay but not very unique. \n\n- Example: Triangle body, 2 wings, horn with patterns; a few smudges or blank areas.'}, {'grade_value': 4, 'grade_description': '- Creature looks very creative and imaginative. \n- Drawing is neat, complete, all parts colored nicely. \n- Many unique and interesting features mixed well. \n\n- Example: Oval body, spiral tail, 3 eyes, wings in bright colors; clean lines, no mess.'}, {'grade_value': 5, 'grade_description': '- Creature is super original, wild, and full of imagination. \n- Drawing is very neat, colorful, finished like a real artist. \n- Complex unique features that stand out. \n\n- Example: Mixed shapes with magical glowing wings, curly horns, 5 eyes, special tail; bright colors, perfect details.'}]}}, 'learning_objectives': []} + +import asyncio +from rag_service.core.feedback_service import FeedbackService +from rag_service.utils.submission_data import normalize_submission_payload + +async def main(): + feedback_service = FeedbackService() + request_id = 123 + print("\nGenerating feedback...") + # Generate feedback + feedback, model_used, template_used = await feedback_service.generate_feedback( + assignment_context=assignment_context, + submission_data=normalize_submission_payload(message_data), + submission_id=request_id, + plagiarism_data=message_data, + feedback_request_id=request_id + ) + + print("\nFeedback Generated:\n", feedback) + print("\nModel Used:", model_used) + print("\nTemplate Used:", template_used) + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/rag_service/scripts/vertexai_client.py b/rag_service/scripts/vertexai_client.py new file mode 100644 index 0000000..a64b143 --- /dev/null +++ b/rag_service/scripts/vertexai_client.py @@ -0,0 +1,18 @@ +# Docs for v1 can be found by changing the above selector ^ +from together import Together +import os + +client = Together( + api_key="key", +) +model_name = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" +models = client.models.list() + +for model in models: + if model.id == model_name: + input_cost = model.pricing.input + output_cost = model.pricing.output + print(f"Model: {model.id}") + print(f"Input Cost per Token: ${input_cost}") + print(f"Output Cost per Token: ${output_cost}") + break \ No newline at end of file diff --git a/rag_service/templates/__init__.py b/rag_service/templates/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/rag_service/templates/pages/__init__.py b/rag_service/templates/pages/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/rag_service/utils/commands.py b/rag_service/utils/commands.py deleted file mode 100644 index 25d9ce0..0000000 --- a/rag_service/utils/commands.py +++ /dev/null @@ -1,29 +0,0 @@ -# apps/rag_service/rag_service/utils/commands.py - -import frappe -from frappe.commands import pass_context -import click - -@click.command('rag-consumer-start') -@pass_context -def start_consumer(context): - """Start the RAG Service consumer""" - from rag_service.rag_service.utils.rabbitmq_consumer import RabbitMQConsumer - - site = context.sites[0] - frappe.init(site=site) - frappe.connect() - - try: - click.echo(f"Starting RAG consumer for site: {site}") - consumer = RabbitMQConsumer() - consumer.start_consuming() - except Exception as e: - click.echo(f"Error starting consumer: {str(e)}") - finally: - frappe.destroy() - -# And update hooks.py: -commands = [ - "rag_service.utils.commands.start_consumer" -] diff --git a/rag_service/utils/gcp_service_client.py b/rag_service/utils/gcp_service_client.py new file mode 100644 index 0000000..b67ecf1 --- /dev/null +++ b/rag_service/utils/gcp_service_client.py @@ -0,0 +1,85 @@ +import json +import mimetypes +import os +import tempfile +from pathlib import Path +from typing import Dict, Optional, Tuple +from urllib.parse import urlparse, unquote + +import frappe +from google.cloud import storage +from google.oauth2 import service_account + + +class GCPServiceClient: + """Shared GCP client factory/helpers using credentials stored in Frappe.""" + + def __init__(self): + settings = frappe.get_single("GCS Settings") + raw_key = (settings.get("credentials_json") or "").strip() + if not raw_key: + raise ValueError("GCS Settings.credentials_json is required") + + try: + self.key_data = json.loads(raw_key) + except json.JSONDecodeError as exc: + raise ValueError("GCS Settings.credentials_json must contain valid JSON") from exc + + self.project_id = settings.get("project_id") or self.key_data.get("project_id") + credentials = service_account.Credentials.from_service_account_info(self.key_data) + self.client = storage.Client(project=self.project_id, credentials=credentials) + + def download_media(self, media_url: str) -> Dict: + bucket_name, object_name = self._parse_gcs_url(media_url) + blob = self.client.bucket(bucket_name).blob(object_name) + + suffix = Path(object_name).suffix or "" + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: + temp_path = temp_file.name + + blob.download_to_filename(temp_path) + mime_type = blob.content_type or mimetypes.guess_type(object_name)[0] or "application/octet-stream" + with open(temp_path, "rb") as media_file: + content = media_file.read() + + return { + "bucket": bucket_name, + "object_name": object_name, + "local_path": temp_path, + "mime_type": mime_type, + "content": content, + "filename": os.path.basename(object_name), + } + + def cleanup(self, media_asset: Optional[Dict]) -> None: + if not media_asset: + return + + local_path = media_asset.get("local_path") + if local_path and os.path.exists(local_path): + os.remove(local_path) + + def _parse_gcs_url(self, media_url: str) -> Tuple[str, str]: + parsed = urlparse(media_url) + + if parsed.scheme == "gs": + bucket = parsed.netloc + object_name = parsed.path.lstrip("/") + return bucket, unquote(object_name) + + host = parsed.netloc.lower() + path = parsed.path.lstrip("/") + + if host == "storage.googleapis.com": + bucket, object_name = path.split("/", 1) + return bucket, unquote(object_name) + + if host.endswith(".storage.googleapis.com"): + bucket = host[: -len(".storage.googleapis.com")] + return bucket, unquote(path) + + if host == "storage.cloud.google.com": + bucket, object_name = path.split("/", 1) + return bucket, unquote(object_name) + + raise ValueError(f"Unsupported GCS media URL format: {media_url}") diff --git a/rag_service/utils/queue_manager.py b/rag_service/utils/queue_manager.py index cd8bc51..1ca4a9e 100644 --- a/rag_service/utils/queue_manager.py +++ b/rag_service/utils/queue_manager.py @@ -5,6 +5,7 @@ import json from typing import Dict from datetime import datetime +import traceback class QueueManager: def __init__(self): @@ -34,17 +35,15 @@ def connect(self) -> None: self.connection = pika.BlockingConnection(parameters) self.channel = self.connection.channel() + self.channel.confirm_delivery() - # Ensure queues exist - self.channel.queue_declare( - queue=self.settings.feedback_results_queue, - durable=True - ) + # Queue exists and topology is managed by another service (LMS) + print(f"Connected to RabbitMQ. Will publish to queue: {self.settings.feedback_results_queue}") print("\nConnected to RabbitMQ successfully") except Exception as e: - error_msg = f"RabbitMQ Connection Error: {str(e)}" + error_msg = f"RabbitMQ Connection Error for producer: {str(e)}\n{traceback.format_exc()}" print(f"\nError: {error_msg}") frappe.log_error(error_msg, "RabbitMQ Connection Error") raise @@ -74,15 +73,18 @@ def send_feedback_to_tap(self, feedback_data: Dict) -> None: } # Send to queue - self.channel.basic_publish( + confirmed = self.channel.basic_publish( exchange='', routing_key=self.settings.feedback_results_queue, - body=json.dumps(message), + body=json.dumps(message, ensure_ascii=False), properties=pika.BasicProperties( delivery_mode=2, # make message persistent content_type='application/json' - ) + ), + mandatory=True ) + if confirmed is False: + raise RuntimeError("RabbitMQ did not confirm feedback publish") print(f"\nFeedback sent successfully for submission: {feedback_data.get('submission_id')}") diff --git a/rag_service/utils/rabbitmq_consumer.py b/rag_service/utils/rabbitmq_consumer.py index fbe177d..5d5cf00 100644 --- a/rag_service/utils/rabbitmq_consumer.py +++ b/rag_service/utils/rabbitmq_consumer.py @@ -3,11 +3,10 @@ import frappe import pika import json -import time import asyncio from datetime import datetime -from typing import Dict, Optional -from ..handlers.feedback_handler import FeedbackHandler +from typing import Dict +from ..core.feedback_handler import FeedbackHandler from .queue_manager import QueueManager class RabbitMQConsumer: @@ -19,6 +18,7 @@ def __init__(self, debug=True): self.processed_count = 0 self.connection = None self.channel = None + self.dead_letter_queue = None def connect(self) -> None: """Establish RabbitMQ connection""" @@ -60,12 +60,18 @@ def start_consuming(self) -> None: self.connect() queue_name = self.settings.plagiarism_results_queue + self.dead_letter_queue = f"{queue_name}.dead_letter" # Declare queue to ensure it exists self.channel.queue_declare( queue=queue_name, durable=True ) + self.channel.queue_declare( + queue=self.dead_letter_queue, + durable=True + ) + self.channel.confirm_delivery() # Get queue information queue_info = self.channel.queue_declare( @@ -114,18 +120,39 @@ def process_message(self, ch, method, properties, body) -> None: print(f"Parsed JSON: {json.dumps(message, indent=2)}") except json.JSONDecodeError as e: print(f"JSON parsing error: {str(e)}") - ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) - print("Message rejected - Invalid JSON") + self._dead_letter_message( + ch, + method, + properties, + body, + "Invalid JSON", + str(e) + ) + print("Message moved to dead-letter queue - Invalid JSON") return # Validate required fields - required_fields = ['submission_id', 'student_id', 'assignment_id', 'img_url'] + required_fields = [ + 'submission_id', + 'student_id', + 'assignment_id', + 'submission_type', + 'submission_url', + 'submission_text', + ] missing_fields = [field for field in required_fields if field not in message] if missing_fields: print(f"Missing required fields: {missing_fields}") - ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) - print("Message rejected - Missing required fields") + self._dead_letter_message( + ch, + method, + properties, + body, + "Missing required fields", + ", ".join(missing_fields) + ) + print("Message moved to dead-letter queue - Missing required fields") return # Process message using feedback handler @@ -167,9 +194,46 @@ def process_message(self, ch, method, properties, body) -> None: message=f"Error processing message: {str(e)}\n\nRaw message: {body}" ) - # Reject message without requeue - ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) - print("Message rejected") + try: + self._dead_letter_message( + ch, + method, + properties, + body, + "Message processing error", + str(e) + ) + print("Message moved to dead-letter queue") + except Exception as dead_letter_error: + print(f"Could not dead-letter message: {str(dead_letter_error)}") + ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True) + print("Message requeued because dead-lettering failed") + + def _dead_letter_message(self, ch, method, properties, body, reason: str, details: str) -> None: + """Persist an unrecoverable message before removing it from the source queue.""" + queue_name = self.settings.plagiarism_results_queue + dead_letter_queue = self.dead_letter_queue or f"{queue_name}.dead_letter" + + ch.queue_declare(queue=dead_letter_queue, durable=True) + payload = { + "original_queue": queue_name, + "reason": reason, + "details": details, + "failed_at": datetime.now().isoformat(), + "body": body.decode("utf-8", errors="replace") if isinstance(body, bytes) else body, + } + + ch.basic_publish( + exchange="", + routing_key=dead_letter_queue, + body=json.dumps(payload, ensure_ascii=False), + properties=pika.BasicProperties( + delivery_mode=2, + content_type="application/json", + ), + mandatory=True, + ) + ch.basic_ack(delivery_tag=method.delivery_tag) def test_connection(self) -> bool: """Test RabbitMQ connection""" diff --git a/rag_service/utils/rabbitmq_manager.py b/rag_service/utils/rabbitmq_manager.py deleted file mode 100644 index 9a43ba2..0000000 --- a/rag_service/utils/rabbitmq_manager.py +++ /dev/null @@ -1,258 +0,0 @@ -import frappe -import pika -import json -import time -from datetime import datetime -from typing import Dict, Optional, Union - -class RabbitMQManager: - def __init__(self, debug=True): - self.settings = frappe.get_single("RabbitMQ Settings") - self.debug = debug - self.connection = None - self.channel = None - self.processed_count = 0 - - def connect(self) -> None: - """Establish RabbitMQ connection""" - try: - if self.debug: - print(f"\nConnecting to RabbitMQ at {self.settings.host}...") - - credentials = pika.PlainCredentials( - self.settings.username, - self.settings.password - ) - - parameters = pika.ConnectionParameters( - host=self.settings.host, - port=int(self.settings.port), - virtual_host=self.settings.virtual_host, - credentials=credentials, - heartbeat=600, - blocked_connection_timeout=300 - ) - - self.connection = pika.BlockingConnection(parameters) - self.channel = self.connection.channel() - - if self.debug: - print("Connection established successfully!") - - except Exception as e: - error_msg = f"RabbitMQ Connection Error: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(error_msg, "RabbitMQ Connection Error") - raise - - def close(self) -> None: - """Close RabbitMQ connection""" - if self.connection and not self.connection.is_closed: - self.connection.close() - self.connection = None - self.channel = None - - def test_connection(self) -> bool: - """Test RabbitMQ connection""" - try: - self.connect() - print("Connection test successful!") - return True - except Exception as e: - print(f"Connection test failed: {str(e)}") - return False - finally: - self.close() - - def get_queue_info(self, queue_name: str = None) -> Dict: - """Get information about a specific queue or all queues""" - try: - self.connect() - - if queue_name is None: - queue_name = self.settings.plagiarism_results_queue - - queue_info = self.channel.queue_declare( - queue=queue_name, - durable=True, - passive=True - ) - - return { - 'queue': queue_name, - 'message_count': queue_info.method.message_count, - 'consumer_count': queue_info.method.consumer_count - } - - except Exception as e: - print(f"Error getting queue info: {str(e)}") - return {} - finally: - self.close() - - def peek_message(self, queue_name: str = None) -> Optional[dict]: - """Peek at the next message in the queue without consuming it""" - try: - self.connect() - - if queue_name is None: - queue_name = self.settings.plagiarism_results_queue - - # Get message without consuming - method_frame, header_frame, body = self.channel.basic_get( - queue=queue_name, - auto_ack=False - ) - - if not method_frame: - print("\nNo messages in queue") - return None - - try: - message = json.loads(body) - result = { - 'delivery_tag': method_frame.delivery_tag, - 'content': message, - 'raw_body': body.decode('utf-8') - } - except json.JSONDecodeError as e: - result = { - 'delivery_tag': method_frame.delivery_tag, - 'error': f"Invalid JSON: {str(e)}", - 'raw_body': body.decode('utf-8') - } - - # Return message to queue - self.channel.basic_nack( - delivery_tag=method_frame.delivery_tag, - requeue=True - ) - - return result - - except Exception as e: - print(f"Error peeking message: {str(e)}") - return None - finally: - self.close() - - def delete_message(self, delivery_tag: int, queue_name: str = None) -> bool: - """Delete a specific message using its delivery tag""" - try: - self.connect() - - if queue_name is None: - queue_name = self.settings.plagiarism_results_queue - - # Get and reject the message - method_frame, _, body = self.channel.basic_get( - queue=queue_name, - auto_ack=False - ) - - if not method_frame: - print("\nNo messages in queue") - return False - - if method_frame.delivery_tag == delivery_tag: - self.channel.basic_reject( - delivery_tag=delivery_tag, - requeue=False - ) - print(f"\nSuccessfully deleted message with delivery tag: {delivery_tag}") - return True - else: - print(f"\nMessage with delivery tag {delivery_tag} not found at queue head") - # Return message to queue - self.channel.basic_nack( - delivery_tag=method_frame.delivery_tag, - requeue=True - ) - return False - - except Exception as e: - error_msg = f"Error deleting message: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(error_msg, "Message Deletion Error") - return False - finally: - self.close() - - def peek_and_delete(self, queue_name: str = None) -> dict: - """Peek at the next message and optionally delete it""" - result = { - 'success': False, - 'message': None, - 'action_taken': None - } - - # First peek at the message - message_info = self.peek_message(queue_name) - - if not message_info: - result['message'] = "No messages in queue" - return result - - # Show the message content - print("\nNext message in queue:") - print(f"Delivery Tag: {message_info['delivery_tag']}") - print(f"Raw content: {message_info['raw_body']}") - - if 'error' in message_info: - print(f"Parse error: {message_info['error']}") - else: - print(f"Parsed content: {json.dumps(message_info['content'], indent=2)}") - - # Ask for confirmation - confirm = input("\nDo you want to delete this message? (y/n): ").lower() - - if confirm == 'y': - success = self.delete_message(message_info['delivery_tag'], queue_name) - result.update({ - 'success': success, - 'message': "Message deleted successfully" if success else "Failed to delete message", - 'action_taken': 'deleted' - }) - else: - result.update({ - 'success': True, - 'message': "Message left in queue", - 'action_taken': 'skipped' - }) - - return result - - def purge_queue(self, queue_name: str = None) -> bool: - """Purge all messages from a queue""" - try: - self.connect() - - if queue_name is None: - queue_name = self.settings.plagiarism_results_queue - - # Get message count before purging - queue_info = self.channel.queue_declare( - queue=queue_name, - durable=True, - passive=True - ) - message_count = queue_info.method.message_count - - # Confirm purge - confirm = input(f"\nAre you sure you want to purge {message_count} messages from queue '{queue_name}'? (y/n): ").lower() - - if confirm == 'y': - self.channel.queue_purge(queue=queue_name) - print(f"\nSuccessfully purged {message_count} messages from queue: {queue_name}") - return True - else: - print("\nPurge cancelled") - return False - - except Exception as e: - error_msg = f"Error purging queue: {str(e)}" - print(f"\nError: {error_msg}") - frappe.log_error(error_msg, "Queue Purge Error") - return False - finally: - self.close() diff --git a/rag_service/utils/setup_test_data.py b/rag_service/utils/setup_test_data.py deleted file mode 100644 index 06c1d52..0000000 --- a/rag_service/utils/setup_test_data.py +++ /dev/null @@ -1,89 +0,0 @@ -# File: ~/frappe-bench/apps/rag_service/rag_service/utils/setup_test_data.py - -import frappe -from frappe.utils import now_datetime -from ..core.embedding_utils import embedding_manager - -def create_test_data(): - """Create test data for RAG system""" - test_contents = [ - { - "content": "Python is a high-level programming language known for its simplicity and readability.", - "reference_id": "test_content_1", - "content_type": "reference" # Changed to lowercase - }, - { - "content": "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience.", - "reference_id": "test_content_2", - "content_type": "reference" - }, - { - "content": "Natural Language Processing (NLP) helps computers understand and process human language.", - "reference_id": "test_content_3", - "content_type": "reference" - }, - { - "content": "This is a sample student submission about programming concepts.", - "reference_id": "submission_1", - "content_type": "submission" - }, - { - "content": "Great work on explaining the concepts. Consider adding more examples.", - "reference_id": "feedback_1", - "content_type": "feedback" - } - ] - - created_docs = [] - - try: - for content in test_contents: - # Check if already exists - existing = frappe.db.exists( - "Vector Store", - {"reference_id": content["reference_id"]} - ) - - if not existing: - # Create embedding and save - vector_store_name = embedding_manager.save_embedding( - reference_id=content["reference_id"], - content=content["content"], - content_type=content["content_type"] - ) - created_docs.append(vector_store_name) - print(f"Created: {vector_store_name} - {content['content_type']}") - - frappe.db.commit() - return f"Created {len(created_docs)} test documents: {', '.join(created_docs)}" - - except Exception as e: - frappe.log_error(frappe.get_traceback(), "Error creating test data") - return f"Error creating test data: {str(e)}" - -def verify_test_data(): - """Verify the created test data""" - try: - vector_stores = frappe.get_all( - "Vector Store", - fields=["name", "content_type", "reference_id", "content", "embedding_file"], - order_by="creation desc" - ) - - print(f"\nFound {len(vector_stores)} Vector Store entries:") - for vs in vector_stores: - print("\nVector Store:", vs.name) - print("Type:", vs.content_type) - print("Reference:", vs.reference_id) - print("Content:", vs.content) - print("Embedding File:", vs.embedding_file) - - return vector_stores - - except Exception as e: - print(f"Error verifying test data: {str(e)}") - return None - -# You can run this from bench console -if __name__ == "__main__": - create_test_data() diff --git a/rag_service/utils/submission_data.py b/rag_service/utils/submission_data.py new file mode 100644 index 0000000..43f6f54 --- /dev/null +++ b/rag_service/utils/submission_data.py @@ -0,0 +1,66 @@ +from typing import Dict, Optional + + +SUPPORTED_SUBMISSION_TYPES = {"image", "video", "audio", "text", "emoji"} +MEDIA_SUBMISSION_TYPES = {"image", "video", "audio"} +TEXT_SUBMISSION_TYPES = {"text", "emoji"} + + +def normalize_submission_payload(message_data: Dict) -> Dict: + submission_type = (message_data.get("submission_type") or "").strip().lower() + submission_url = _normalize_optional_str(message_data.get("submission_url")) + submission_text = _normalize_optional_text(message_data.get("submission_text")) + + if submission_type not in SUPPORTED_SUBMISSION_TYPES: + raise ValueError( + f"Unsupported submission_type '{submission_type}'. Supported values: {sorted(SUPPORTED_SUBMISSION_TYPES)}" + ) + + if submission_type in MEDIA_SUBMISSION_TYPES and not submission_url: + raise ValueError(f"submission_url is required for submission_type '{submission_type}'") + + if submission_type in TEXT_SUBMISSION_TYPES and not submission_text: + raise ValueError(f"submission_text is required for submission_type '{submission_type}'") + + return { + "submission_type": submission_type, + "submission_url": submission_url, + "submission_text": submission_text, + "expected_submission_type": message_data.get("expected_submission_type"), + "archetype": message_data.get("archetype"), + "current_week": message_data.get("current_week"), + "escalation_step_at_submit": message_data.get("escalation_step_at_submit"), + } + + +def build_submission_content(submission_data: Dict) -> str: + submission_type = submission_data.get("submission_type") + if submission_type in MEDIA_SUBMISSION_TYPES: + return submission_data.get("submission_url") or "" + return submission_data.get("submission_text") or "" + + +def format_submission_text_for_prompt(submission_data: Dict) -> str: + submission_type = submission_data.get("submission_type") + submission_text = submission_data.get("submission_text") or "" + if not submission_text: + return "" + + if submission_type == "emoji": + return f"Student submitted an emoji response: {submission_text}" + + return f"Student submitted the following text response:\n{submission_text}" + + +def _normalize_optional_str(value: Optional[str]) -> Optional[str]: + if value is None: + return None + normalized = str(value).strip() + return normalized or None + + +def _normalize_optional_text(value: Optional[str]) -> Optional[str]: + if value is None: + return None + text = str(value) + return text if text.strip() else None diff --git a/requirements.txt b/requirements.txt index 5dd4609..69d6bd1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,6 @@ pikka langchain_openai +aiohttp +requests +numpy +opencv-python