Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ FEEDBACK_QUEUE=plagiarism_feedback
# Dead Letter Queue (optional - leave empty to disable)
DEAD_LETTER_QUEUE=plagiarism_failed_submissions

#GCP CONFIGURATION
GCP_ENABLED=true
GCP_KEY_PATH=/app/credentials/gcp_service_account.json

# POSTGRESQL CONFIGURATION
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
Expand Down
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Credentials
app/credentials/*

scratch*

# Python
__pycache__/
*.py[cod]
Expand All @@ -14,6 +19,7 @@ build/
# Environment variables
.env
.env.local
.env.prod

# Logs
logs/
Expand Down
21 changes: 10 additions & 11 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ WORKDIR /app
# Install build dependencies in a single layer
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
vim \
gcc \
g++ \
git \
Expand All @@ -18,10 +19,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# Copy only requirements first for better caching
COPY requirements.txt .

# Use pip cache and install in parallel
RUN --mount=type=cache,target=/root/.cache/pip \
python -m pip install --upgrade pip setuptools wheel && \
pip install -r requirements.txt --user --no-warn-script-location
# Install to explicit location
RUN python -m pip install --no-cache-dir --prefix=/install --upgrade pip setuptools wheel && \
pip install --no-cache-dir --no-deps --prefix=/install -r requirements.txt

RUN pip install --prefix=/install -r requirements.txt --no-cache-dir

# ============================================
# Final stage - minimal runtime image
Expand All @@ -37,18 +39,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& rm -rf /var/lib/apt/lists/*

# Copy installed packages from builder
COPY --from=builder /root/.local /root/.local

ENV PATH=/root/.local/bin:$PATH
COPY --from=builder /install /usr/local

# Create necessary directories
RUN mkdir -p /app/data /app/logs /root/.cache/clip

RUN ls

# Copy application code (do this last for better caching)
COPY . .

# Lightweight healthcheck
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python -c "import sys; sys.exit(0)" || exit 1

CMD ["python", "app.py"]
CMD ["python", "app.py"]
20 changes: 13 additions & 7 deletions Dockerfile.api
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.13-slim
FROM python:3.13-slim as builder

WORKDIR /app

Expand All @@ -7,9 +7,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf

# Copy requirements and install dependencies
COPY api/requirements.txt /app/api/requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
python -m pip install --upgrade pip && \
pip install -r /app/api/requirements.txt
RUN python -m pip install --prefix=/install -r --upgrade pip && \
pip install --prefix=/install -r /app/api/requirements.txt --no-cache-dir

# ============================================
# Final stage - minimal runtime image
# ============================================
FROM python:3.13-slim

WORKDIR /app

# Copy installed packages from builder
COPY --from=builder /install /usr/local

# Copy application code
COPY api/ /app/api/
Expand All @@ -18,9 +27,6 @@ COPY utils/ /app/utils/
# Expose API port
EXPOSE 8000

# Healthcheck (check if uvicorn is responding)
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8000/ || exit 1

# Run the API
CMD ["uvicorn", "api.api:app", "--host", "0.0.0.0", "--port", "8000"]
113 changes: 80 additions & 33 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,49 @@
from fastapi import FastAPI, HTTPException, status
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field, validator
import aio_pika
import json
import uuid

from dotenv import load_dotenv
import os

sys.path.insert(0, str(Path(__file__).parent.parent))
from utils.security import safe_hash_student_id

# load_dotenv()

# Configure logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

# RabbitMQ Configuration
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "localhost")
import aio_pika
import json
import uuid
from dotenv import load_dotenv
import os

sys.path.insert(0, str(Path(__file__).parent.parent))
from utils.security import safe_hash_student_id

load_dotenv()

DEFAULT_LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"


def get_log_format() -> str:
log_format = os.getenv("LOG_FORMAT", DEFAULT_LOG_FORMAT)
if log_format.lower() == "json":
return DEFAULT_LOG_FORMAT
return log_format


# Configure logging
logging.basicConfig(
level=getattr(logging, os.getenv("LOG_LEVEL", "INFO").upper(), logging.INFO),
format=get_log_format(),
)
logger = logging.getLogger(__name__)

# RabbitMQ Configuration
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "localhost")
RABBITMQ_PORT = os.getenv("RABBITMQ_PORT", "5672")
RABBITMQ_VHOST = os.getenv("RABBITMQ_VHOST", "/")
RABBITMQ_USER = os.getenv("RABBITMQ_USER", "admin")
RABBITMQ_PASS = os.getenv("RABBITMQ_PASS", "admin123")

#PRINT THE RABBITMQ CONFIG FOR DEBUGGING
# logger.info("###################")
# logger.info(f"RABBITMQ_HOST={RABBITMQ_HOST}")
# logger.info(f"RABBITMQ_PORT={RABBITMQ_PORT}")
# logger.info(f"RABBITMQ_VHOST={RABBITMQ_VHOST}")
# logger.info(f"RABBITMQ_USER={RABBITMQ_USER}")
# logger.info("###################")

SUBMISSION_QUEUE = os.getenv("SUBMISSION_QUEUE", "plagiarism_submissions")
FEEDBACK_QUEUE = os.getenv("FEEDBACK_QUEUE", "plagiarism_feedback")

Expand All @@ -53,15 +71,42 @@ class SubmissionRequest(BaseModel):
"""Request model for submission creation"""

student_id: str = Field(..., description="Student identifier", min_length=1)
image_url: str = Field(..., description="URL of the submitted image", min_length=1)
submission_type: str = Field(..., description="Type of submission: text, audio, video, or image")
submission_url: Optional[str] = Field(
None, description="URL of the submitted resource (required for image submissions)"
)
submission_text: Optional[str] = Field(
None, description="Text content of the submission (required for text submissions)"
)
submitted_at: Optional[str] = Field(
None,
description="Original submission timestamp in ISO format. If provided, this value is preserved in the processed result.",
)
assignment_id: Optional[str] = Field(None, description="Assignment identifier")

@validator("image_url")
def validate_url(cls, v):
"""Validate that image_url is not empty"""
if not v or not v.strip():
raise ValueError("image_url cannot be empty")
return v.strip()
@validator("submission_type")
def validate_submission_type(cls, v):
"""Validate that submission_type is one of the supported values"""
allowed = {"text", "audio", "video", "image"}
if v not in allowed:
raise ValueError(f"submission_type must be one of {sorted(allowed)}")
return v

@validator("submission_url", always=True)
def validate_submission_url(cls, v, values):
if values.get("submission_type") == "image":
if not v or not v.strip():
raise ValueError("submission_url is required for image submissions")
return v.strip()
return v

@validator("submission_text", always=True)
def validate_submission_text(cls, v, values):
if values.get("submission_type") == "text":
if not v or not v.strip():
raise ValueError("submission_text is required for text submissions")
return v.strip()
return v

@validator("student_id")
def validate_student_id(cls, v):
Expand Down Expand Up @@ -199,13 +244,13 @@ async def send_to_rabbitmq(message: dict, queue_name: str):
)
async def create_submission(request: SubmissionRequest):
"""
Submit an image for plagiarism detection
Submit a new user submission for plagiarism detection

Creates a new plagiarism check submission by sending the image URL
and metadata to the processing queue.
Creates a new plagiarism check submission by sending submission metadata
to the processing queue.

Args:
request: Submission request containing student_id, image_url, and optional assignment_id
request: Submission request containing student_id, submission_type, submission_url, and optional assignment_id

Returns:
SubmissionResponse with submission details and unique ID
Expand Down Expand Up @@ -237,9 +282,11 @@ async def create_submission(request: SubmissionRequest):
payload = {
"student_id": hashed_student_id,
"submission_id": submission_id,
"img_url": request.image_url,
"submission_type": request.submission_type,
"submission_url": request.submission_url,
"submission_text": request.submission_text,
"submitted_at": request.submitted_at,
"assign_id": assignment_id,
"submitted_at": datetime.datetime.utcnow().isoformat(),
}

# Send to RabbitMQ
Expand Down
46 changes: 31 additions & 15 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,32 @@
import asyncio
import logging
import signal
import sys
import os
from mq.rmq_client import RabbitMQClient
from plag_checker.submissions_checker import SubmissionChecker
from dotenv import load_dotenv

__version__ = "1.0.0"

# Configure logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
load_dotenv()
import signal
import sys
import os
from dotenv import load_dotenv
from mq.rmq_client import RabbitMQClient
from plag_checker.submissions_checker import SubmissionChecker

__version__ = "1.0.0"

load_dotenv()

DEFAULT_LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"


def get_log_format() -> str:
log_format = os.getenv("LOG_FORMAT", DEFAULT_LOG_FORMAT)
if log_format.lower() == "json":
return DEFAULT_LOG_FORMAT
return log_format


# Configure logging
logging.basicConfig(
level=getattr(logging, os.getenv("LOG_LEVEL", "INFO").upper(), logging.INFO),
format=get_log_format(),
)
logger = logging.getLogger(__name__)


def validate_configuration():
Expand All @@ -30,6 +42,10 @@ def validate_configuration():
]

missing = [var for var in required_env_vars if not os.getenv(var)]
#print the required env vars and their values for debugging
# for var in required_env_vars:
# logger.info("###################")
# logger.info(f"{var}={os.getenv(var)}")
if missing:
logger.error(f"Missing required environment variables: {missing}")
raise ValueError(f"Missing required environment variables: {missing}")
Expand Down
16 changes: 15 additions & 1 deletion config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class DetectionConfig(BaseSettings):

exact_dup_threshold: float = Field(default=0.95, env="EXACT_DUPLICATE_THRESHOLD")
near_dup_threshold: float = Field(default=0.90, env="NEAR_DUPLICATE_THRESHOLD")
semantic_threshold: float = Field(default=0.80, env="SEMANTIC_MATCH_THRESHOLD")
semantic_threshold: float = Field(default=0.70, env="SEMANTIC_MATCH_THRESHOLD")

# Hash matching thresholds (Hamming distance, 0-64 bits)
hash_threshold: int = Field(default=8, env="HASH_MATCH_THRESHOLD")
Expand Down Expand Up @@ -215,6 +215,19 @@ class Config:
case_sensitive = False


class GCPConfig(BaseSettings):
"""GCP Cloud Storage configuration for authenticated image downloads."""

gcp_enabled: bool = Field(default=True, env="GCP_ENABLED")
gcp_key_path: str = Field(
default="", env="GCP_KEY_PATH", description="Path to GCP service account JSON key file"
)

class Config:
env_file = ".env"
case_sensitive = False


class LoggingConfig(BaseSettings):
"""Logging configuration."""

Expand All @@ -239,6 +252,7 @@ class AppConfig(BaseSettings):
detection: DetectionConfig = DetectionConfig()
vector_search: VectorSearchConfig = VectorSearchConfig()
image_processing: ImageProcessingConfig = ImageProcessingConfig()
gcp: GCPConfig = GCPConfig()
logging: LoggingConfig = LoggingConfig()

app_name: str = Field(default="MentorMe Plagiarism Detection", env="APP_NAME")
Expand Down
Loading