diff --git a/backend/app/alembic/versions/0e7dd198b7c7_convert_status_to_enum_type.py b/backend/app/alembic/versions/0e7dd198b7c7_convert_status_to_enum_type.py
new file mode 100644
index 0000000000..a214181072
--- /dev/null
+++ b/backend/app/alembic/versions/0e7dd198b7c7_convert_status_to_enum_type.py
@@ -0,0 +1,103 @@
+"""convert_status_to_enum_type
+
+Convert ingestions.status column from VARCHAR to PostgreSQL ENUM type.
+
+This migration:
+1. Creates extractionstatus ENUM type with all status values
+2. Converts existing VARCHAR status column to use the ENUM type
+3. Maintains data integrity by mapping existing values to ENUM
+
+Revision ID: 0e7dd198b7c7
+Revises: 2ccac127c59f
+Create Date: 2025-10-30 13:25:21.537208
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel.sql.sqltypes
+
+
+# revision identifiers, used by Alembic.
+revision = '0e7dd198b7c7'
+down_revision = '20038a3ab258'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    """Convert status column to PostgreSQL ENUM type."""
+    # Create extractionstatus ENUM type
+    op.execute("""
+        CREATE TYPE extractionstatus AS ENUM (
+            'UPLOADED',
+            'OCR_IN_PROGRESS',
+            'OCR_COMPLETE',
+            'OCR_FAILED',
+            'SEGMENTATION_PROCESSING',
+            'SEGMENTATION_COMPLETE',
+            'TAGGING_PROCESSING',
+            'DRAFT',
+            'IN_REVIEW',
+            'APPROVED',
+            'REJECTED',
+            'FAILED'
+        )
+    """)
+
+    # Update existing 'OCR_PROCESSING' values to 'OCR_IN_PROGRESS' if any exist
+    op.execute("""
+        UPDATE ingestions
+        SET status = 'OCR_IN_PROGRESS'
+        WHERE status = 'OCR_PROCESSING'
+    """)
+
+    # Step 1: Drop the existing default value
+    op.execute("""
+        ALTER TABLE ingestions
+        ALTER COLUMN status DROP DEFAULT
+    """)
+
+    # Step 2: Convert status column to use ENUM type
+    op.execute("""
+        ALTER TABLE ingestions
+        ALTER COLUMN status TYPE extractionstatus
+        USING status::text::extractionstatus
+    """)
+
+    # Step 3: Re-add the default value as ENUM type
+    op.execute("""
+        ALTER TABLE ingestions
+        ALTER COLUMN status SET DEFAULT 'UPLOADED'::extractionstatus
+    """)
+
+
+def downgrade():
+    """Convert status column back to VARCHAR."""
+    # Step 1: Drop the ENUM default
+    op.execute("""
+        ALTER TABLE ingestions
+        ALTER COLUMN status DROP DEFAULT
+    """)
+
+    # Step 2: Convert status column back to VARCHAR
+    op.execute("""
+        ALTER TABLE ingestions
+        ALTER COLUMN status TYPE VARCHAR
+        USING status::text
+    """)
+
+    # Step 3: Re-add the VARCHAR default
+    op.execute("""
+        ALTER TABLE ingestions
+        ALTER COLUMN status SET DEFAULT 'UPLOADED'
+    """)
+
+    # Step 4: Drop the ENUM type
+    op.execute("DROP TYPE extractionstatus")
+
+    # Step 5: Revert OCR_IN_PROGRESS back to OCR_PROCESSING if any exist
+    op.execute("""
+        UPDATE ingestions
+        SET status = 'OCR_PROCESSING'
+        WHERE status = 'OCR_IN_PROGRESS'
+    """)
diff --git a/backend/app/models.py b/backend/app/models.py
index a1eb463e8f..5ebe527c01 100644
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -86,8 +86,9 @@ class ExtractionStatus(str, Enum):
     """Extraction pipeline status enum."""
 
     UPLOADED = "UPLOADED"
-    OCR_PROCESSING = "OCR_PROCESSING"
+    OCR_IN_PROGRESS = "OCR_IN_PROGRESS"
     OCR_COMPLETE = "OCR_COMPLETE"
+    OCR_FAILED = "OCR_FAILED"
     SEGMENTATION_PROCESSING = "SEGMENTATION_PROCESSING"
     SEGMENTATION_COMPLETE = "SEGMENTATION_COMPLETE"
     TAGGING_PROCESSING = "TAGGING_PROCESSING"
diff --git a/backend/app/services/ocr.py b/backend/app/services/ocr.py
index 4246c1b300..97f36283aa 100644
--- a/backend/app/services/ocr.py
+++ b/backend/app/services/ocr.py
@@ -6,7 +6,7 @@
 
 import uuid
 from datetime import datetime
-from typing import Any
+from typing import Any, Literal
 
 import httpx
 from pydantic import BaseModel, Field
@@ -61,6 +61,20 @@ class BoundingBox(BaseModel):
     height: float = Field(..., description="Height of the bounding box")
 
 
+class TableStructure(BaseModel):
+    """Table layout extracted by Mistral OCR.
+
+    Represents the structure of a table including dimensions and cell contents.
+    """
+
+    rows: int = Field(..., description="Number of rows in the table", gt=0)
+    columns: int = Field(..., description="Number of columns in the table", gt=0)
+    cells: list[dict[str, Any]] = Field(
+        default_factory=list,
+        description="Cell data with row, col, text, and bbox information",
+    )
+
+
 class ContentBlock(BaseModel):
     """A content block extracted from a PDF page.
 
@@ -68,15 +82,14 @@ class ContentBlock(BaseModel):
     """
 
     block_id: str = Field(..., description="Unique identifier for this content block")
-    block_type: str = Field(
-        ...,
-        description="Type of content: text, equation, table, image, header, paragraph, list",
-    )
+    block_type: Literal[
+        "text", "header", "paragraph", "list", "table", "equation", "image"
+    ] = Field(..., description="Type of content block")
     text: str = Field(..., description="Extracted text content")
     bbox: BoundingBox = Field(..., description="Bounding box coordinates")
     confidence: float = Field(..., ge=0.0, le=1.0, description="OCR confidence score")
     latex: str | None = Field(None, description="LaTeX representation for equations")
-    table_structure: dict[str, Any] | None = Field(
+    table_structure: TableStructure | None = Field(
         None, description="Table structure metadata (rows, columns, cells)"
     )
     image_description: str | None = Field(
@@ -153,7 +166,9 @@ def __init__(self, api_key: str, base_url: str = "https://api.mistral.ai/v1"):
             timeout=httpx.Timeout(60.0),
         )
 
-    def _map_block_type(self, mistral_type: str) -> str:
+    def _map_block_type(
+        self, mistral_type: str
+    ) -> Literal["text", "header", "paragraph", "list", "table", "equation", "image"]:
         """Map Mistral's block type to semantic types for segmentation.
 
         Args:
@@ -162,7 +177,12 @@ def _map_block_type(self, mistral_type: str) -> str:
         Returns:
             Semantic block type (e.g., "header", "paragraph")
         """
-        mapping = {
+        mapping: dict[
+            str,
+            Literal[
+                "text", "header", "paragraph", "list", "table", "equation", "image"
+            ],
+        ] = {
             "heading": "header",
             "text": "paragraph",
             "equation": "equation",
@@ -281,6 +301,15 @@ async def extract_text(self, pdf_bytes: bytes) -> OCRResult:
 
                     # If no type provided, default to "text" (fallback/unknown type)
                     # If type is provided, map to semantic type
+                    block_type: Literal[
+                        "text",
+                        "header",
+                        "paragraph",
+                        "list",
+                        "table",
+                        "equation",
+                        "image",
+                    ]
                     if mistral_type is None:
                         block_type = "text"  # Default fallback
                     else:
@@ -322,11 +351,11 @@ async def extract_text(self, pdf_bytes: bytes) -> OCRResult:
                         ),
                         confidence=0.95,
                         latex=None,
-                        table_structure={
-                            "rows": table_data.get("rows"),
-                            "columns": table_data.get("columns"),
-                            "cells": table_data.get("cells", []),
-                        },
+                        table_structure=TableStructure(
+                            rows=table_data.get("rows", 0),
+                            columns=table_data.get("columns", 0),
+                            cells=table_data.get("cells", []),
+                        ),
                         image_description=None,
                         markdown_content=None,
                         hierarchy_level=None,
diff --git a/backend/app/tasks/extraction.py b/backend/app/tasks/extraction.py
index d3f8566873..3f54550520 100644
--- a/backend/app/tasks/extraction.py
+++ b/backend/app/tasks/extraction.py
@@ -78,11 +78,11 @@ def process_ocr_task(self: Any, ingestion_id: str) -> dict[str, Any]:
                 logger.error(f"Ingestion {ingestion_id} not found in database")
                 raise ValueError(f"Ingestion {ingestion_id} not found")
 
-            # Update status to OCR_PROCESSING
-            ingestion.status = ExtractionStatus.OCR_PROCESSING
+            # Update status to OCR_IN_PROGRESS
+            ingestion.status = ExtractionStatus.OCR_IN_PROGRESS
             db.add(ingestion)
             db.commit()
-            logger.info(f"[{ingestion_id}] Status updated to OCR_PROCESSING")
+            logger.info(f"[{ingestion_id}] Status updated to OCR_IN_PROGRESS")
 
             # Download PDF from storage
             logger.info(
diff --git a/backend/tests/services/test_ocr.py b/backend/tests/services/test_ocr.py
index 964dd11521..a6ab90bf09 100644
--- a/backend/tests/services/test_ocr.py
+++ b/backend/tests/services/test_ocr.py
@@ -204,7 +204,7 @@ def mock_handler(request: httpx.Request) -> httpx.Response:
             )
             assert table_block is not None
             assert table_block.table_structure is not None
-            assert table_block.table_structure["rows"] == 2
+            assert table_block.table_structure.rows == 2
 
     @pytest.mark.asyncio
     async def test_extract_text_api_error_400(self):
@@ -625,17 +625,17 @@ def mock_handler(request: httpx.Request) -> httpx.Response:
             # Verify table structure with cell-level detail
             table_struct = table_block.table_structure
             assert table_struct is not None
-            assert table_struct["rows"] == 4
-            assert table_struct["columns"] == 2
-            assert len(table_struct["cells"]) == 4
+            assert table_struct.rows == 4
+            assert table_struct.columns == 2
+            assert len(table_struct.cells) == 4
 
             # Verify cell data with row/column positions
-            cell_a = table_struct["cells"][0]
+            cell_a = table_struct.cells[0]
             assert cell_a["row"] == 0
             assert cell_a["col"] == 0
             assert cell_a["text"] == "A."
 
-            cell_b = table_struct["cells"][2]
+            cell_b = table_struct.cells[2]
             assert cell_b["row"] == 1
             assert cell_b["col"] == 0
             assert cell_b["text"] == "B."
diff --git a/backend/tests/tasks/test_extraction.py b/backend/tests/tasks/test_extraction.py
index 988e73b3cc..82aaade44c 100644
--- a/backend/tests/tasks/test_extraction.py
+++ b/backend/tests/tasks/test_extraction.py
@@ -100,7 +100,7 @@ def test_process_ocr_task_success(
         from app.models import Ingestion
 
         mock_db.get.assert_called_once_with(Ingestion, mock_ingestion.id)
-        assert mock_db.commit.call_count == 2  # Status OCR_PROCESSING + OCR_COMPLETE
+        assert mock_db.commit.call_count == 2  # Status OCR_IN_PROGRESS + OCR_COMPLETE
 
         # Verify ingestion status was updated to OCR_COMPLETE
         assert mock_ingestion.status == ExtractionStatus.OCR_COMPLETE
@@ -234,7 +234,7 @@ def test_process_ocr_task_updates_status_to_processing(
         mock_ingestion,
         mock_ocr_result,
     ):
-        """Test task updates status to OCR_PROCESSING before starting OCR."""
+        """Test task updates status to OCR_IN_PROGRESS before starting OCR."""
         mock_settings.MISTRAL_API_KEY = "test-api-key"
 
         mock_db = MagicMock()
@@ -257,7 +257,7 @@ def track_status_change(*args, **kwargs):
 
         process_ocr_task(str(mock_ingestion.id))
 
-        # Verify status progression: OCR_PROCESSING -> OCR_COMPLETE
+        # Verify status progression: OCR_IN_PROGRESS -> OCR_COMPLETE
         assert len(status_changes) >= 2
-        assert ExtractionStatus.OCR_PROCESSING in status_changes
+        assert ExtractionStatus.OCR_IN_PROGRESS in status_changes
         assert status_changes[-1] == ExtractionStatus.OCR_COMPLETE
diff --git a/frontend/src/client/schemas.gen.ts b/frontend/src/client/schemas.gen.ts
index 8f2edf50ef..e917082ca1 100644
--- a/frontend/src/client/schemas.gen.ts
+++ b/frontend/src/client/schemas.gen.ts
@@ -71,7 +71,7 @@ export const Body_login_login_access_tokenSchema = {
 
 export const ExtractionStatusSchema = {
     type: 'string',
-    enum: ['UPLOADED', 'OCR_PROCESSING', 'OCR_COMPLETE', 'SEGMENTATION_PROCESSING', 'SEGMENTATION_COMPLETE', 'TAGGING_PROCESSING', 'DRAFT', 'IN_REVIEW', 'APPROVED', 'REJECTED', 'FAILED'],
+    enum: ['UPLOADED', 'OCR_IN_PROGRESS', 'OCR_COMPLETE', 'OCR_FAILED', 'SEGMENTATION_PROCESSING', 'SEGMENTATION_COMPLETE', 'TAGGING_PROCESSING', 'DRAFT', 'IN_REVIEW', 'APPROVED', 'REJECTED', 'FAILED'],
     title: 'ExtractionStatus',
     description: 'Extraction pipeline status enum.'
 } as const;
diff --git a/frontend/src/client/types.gen.ts b/frontend/src/client/types.gen.ts
index 8aa2f690a7..0492cd11a1 100644
--- a/frontend/src/client/types.gen.ts
+++ b/frontend/src/client/types.gen.ts
@@ -19,7 +19,7 @@ export type Body_login_login_access_token = {
 /**
  * Extraction pipeline status enum.
  */
-export type ExtractionStatus = 'UPLOADED' | 'OCR_PROCESSING' | 'OCR_COMPLETE' | 'SEGMENTATION_PROCESSING' | 'SEGMENTATION_COMPLETE' | 'TAGGING_PROCESSING' | 'DRAFT' | 'IN_REVIEW' | 'APPROVED' | 'REJECTED' | 'FAILED';
+export type ExtractionStatus = 'UPLOADED' | 'OCR_IN_PROGRESS' | 'OCR_COMPLETE' | 'OCR_FAILED' | 'SEGMENTATION_PROCESSING' | 'SEGMENTATION_COMPLETE' | 'TAGGING_PROCESSING' | 'DRAFT' | 'IN_REVIEW' | 'APPROVED' | 'REJECTED' | 'FAILED';
 
 export type HTTPValidationError = {
     detail?: Array<ValidationError>;