From 2a210814329553645ac1230d2e7a0ab9c5107db0 Mon Sep 17 00:00:00 2001
From: "manthapavankumar11@gmail.com" <manthapavankumar11@gmail.com>
Date: Wed, 29 Apr 2026 06:05:33 +0530
Subject: [PATCH 1/2] add quantization to the create collection feature

---
 README.md              |  77 +++++++++++++++++++++
 src/qql/ast_nodes.py   |  20 +++++-
 src/qql/cli.py         |   4 ++
 src/qql/executor.py    |  75 ++++++++++++++++++---
 src/qql/lexer.py       |  16 ++++-
 src/qql/parser.py      |  56 ++++++++++++++++
 tests/test_executor.py | 147 +++++++++++++++++++++++++++++++++++++++++
 tests/test_parser.py   | 108 ++++++++++++++++++++++++++++++
 8 files changed, 489 insertions(+), 14 deletions(-)
diff --git a/README.md b/README.md
index d6a3c09..b45358f 100644
--- a/README.md
+++ b/README.md
@@ -44,6 +44,7 @@ qql> SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 USING HYBRID RERANK
   - [Cross-Encoder Reranking (RERANK)](#cross-encoder-reranking-rerank)
   - [SHOW COLLECTIONS — list collections](#show-collections--list-collections)
   - [CREATE COLLECTION — create a collection](#create-collection--create-a-collection)
+  - [Quantization — QUANTIZE clause](#quantization--quantize-clause)
   - [CREATE INDEX — create a payload index](#create-index--create-a-payload-index)
   - [DROP COLLECTION — delete a collection](#drop-collection--delete-a-collection)
   - [DELETE — remove points](#delete--remove-points)
@@ -903,6 +904,82 @@ If the collection already exists, the command succeeds with a message and does n
 
 ---
 
+### Quantization — QUANTIZE clause
+
+Quantization reduces the memory footprint of vector collections and speeds up search at the cost of a small, controllable accuracy loss. QQL supports all three Qdrant quantization strategies via an optional `QUANTIZE` clause appended to `CREATE COLLECTION`.
+
+**Three strategies:**
+
+| Type | Compression | Accuracy Loss | Best For |
+|---|---|---|---|
+| `SCALAR` | 4× (float32 → int8) | < 1% | Most collections — best balance |
+| `BINARY` | 32× (float32 → 1-bit) | Higher | High-dimensional vectors (768+), speed priority |
+| `PRODUCT` | 4× (configurable) | Variable | Memory-constrained deployments |
+
+**Full syntax:**
+```
+CREATE COLLECTION <name> ... QUANTIZE SCALAR [QUANTILE <0.0–1.0>] [ALWAYS RAM]
+CREATE COLLECTION <name> ... QUANTIZE BINARY  [ALWAYS RAM]
+CREATE COLLECTION <name> ... QUANTIZE PRODUCT [ALWAYS RAM]
+```
+
+- **`QUANTILE <float>`** — (scalar only) calibration quantile for the INT8 conversion; defaults to Qdrant's built-in default (0.99) when omitted. Lower values improve outlier handling at the cost of a slightly wider value range.
+- **`ALWAYS RAM`** — keep the **original** (unquantized) vectors in RAM for rescoring, sacrificing memory savings but preserving accuracy during re-ranking. Supported by all three types.
+- **`QUANTIZE`** always appears **after** all other clauses (`HYBRID`, `USING MODEL`, etc.).
+- For `PRODUCT`, the compression ratio is fixed at **4×** in this version.
+- When used with `HYBRID` collections, quantization applies only to the **dense** vector (Qdrant's behavior).
+
+**Examples:**
+
+Scalar quantization (recommended default):
+```sql
+CREATE COLLECTION research_papers QUANTIZE SCALAR
+```
+
+Scalar with explicit calibration and original vectors kept in RAM:
+```sql
+CREATE COLLECTION research_papers QUANTIZE SCALAR QUANTILE 0.95 ALWAYS RAM
+```
+
+Binary quantization for large high-dimensional embeddings:
+```sql
+CREATE COLLECTION research_papers QUANTIZE BINARY
+```
+
+Product quantization for maximum memory savings:
+```sql
+CREATE COLLECTION research_papers QUANTIZE PRODUCT ALWAYS RAM
+```
+
+Combined with hybrid collection:
+```sql
+CREATE COLLECTION research_papers HYBRID QUANTIZE SCALAR
+```
+
+Combined with a pinned model:
+```sql
+CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE SCALAR QUANTILE 0.99
+```
+
+Combined with hybrid + dense model:
+```sql
+CREATE COLLECTION research_papers USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE BINARY
+```
+
+**Valid combinations:**
+
+| Base form | + QUANTIZE SCALAR | + QUANTIZE BINARY | + QUANTIZE PRODUCT |
+|---|---|---|---|
+| `CREATE COLLECTION name` | ✓ | ✓ | ✓ |
+| `... HYBRID` | ✓ | ✓ | ✓ |
+| `... USING MODEL 'x'` | ✓ | ✓ | ✓ |
+| `... USING HYBRID` | ✓ | ✓ | ✓ |
+| `... USING HYBRID DENSE MODEL 'x'` | ✓ | ✓ | ✓ |
+
+> **Note:** INSERT and SEARCH on quantized collections work exactly the same as on non-quantized ones — no changes to INSERT or SEARCH syntax are needed.
+
+---
+
 ### CREATE INDEX — create a payload index
 
 Creates a payload index on a collection field. Payload indexes speed up `WHERE` clause filtering by allowing Qdrant to efficiently match on indexed fields.
diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py
index 1f282f9..5aa0562 100644
--- a/src/qql/ast_nodes.py
+++ b/src/qql/ast_nodes.py
@@ -1,9 +1,24 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from enum import Enum
 from typing import Any, Union
 
 
+class QuantizationType(Enum):
+    SCALAR  = "scalar"
+    BINARY  = "binary"
+    PRODUCT = "product"
+
+
+@dataclass(frozen=True)
+class QuantizationConfig:
+    """Quantization settings parsed from a QUANTIZE clause."""
+    type: QuantizationType
+    quantile: float | None = None   # SCALAR only; None → Qdrant default (0.99)
+    always_ram: bool = False        # all types; default False
+
+
 @dataclass(frozen=True)
 class SearchWith:
     """Query-time search params supported by Qdrant SearchParams."""
@@ -141,8 +156,9 @@ class InsertBulkStmt:
 @dataclass(frozen=True)
 class CreateCollectionStmt:
     collection: str
-    hybrid: bool = False    # if True, create with dense + sparse named vectors
-    model: str | None = None  # dense model; None → use config default
+    hybrid: bool = False                      # if True, create with dense + sparse named vectors
+    model: str | None = None                  # dense model; None → use config default
+    quantization: QuantizationConfig | None = None  # optional QUANTIZE clause
 
 
 @dataclass(frozen=True)
diff --git a/src/qql/cli.py b/src/qql/cli.py
index bfc169a..7e4acf7 100644
--- a/src/qql/cli.py
+++ b/src/qql/cli.py
@@ -38,6 +38,10 @@
       Create a new collection. Add HYBRID for dense+sparse BM25 vectors.
       Optional: [yellow]USING MODEL[/yellow] '<model>'
       Optional: [yellow]USING HYBRID[/yellow] [DENSE MODEL '<model>']
+      Optional: [yellow]QUANTIZE SCALAR[/yellow] [QUANTILE <0.0–1.0>] [ALWAYS RAM]
+      Optional: [yellow]QUANTIZE BINARY[/yellow] [ALWAYS RAM]
+      Optional: [yellow]QUANTIZE PRODUCT[/yellow] [ALWAYS RAM]   (4× compression)
+      QUANTIZE may be combined with any HYBRID or MODEL clause.
 
   [yellow]DROP COLLECTION[/yellow] <name>
       Delete a collection and all its points.
diff --git a/src/qql/executor.py b/src/qql/executor.py
index 1a8d8fa..f43b2d8 100644
--- a/src/qql/executor.py
+++ b/src/qql/executor.py
@@ -9,6 +9,9 @@
 from qdrant_client.http.exceptions import UnexpectedResponse
 from qdrant_client.models import (
     AcornSearchParams,
+    BinaryQuantization,
+    BinaryQuantizationConfig,
+    CompressionRatio,
     Distance,
     FieldCondition,
     Filter,
@@ -29,10 +32,15 @@
     PayloadSchemaType,
     PointStruct,
     Prefetch,
+    ProductQuantization,
+    ProductQuantizationConfig,
     Range,
     RecommendInput,
     RecommendQuery,
     RecommendStrategy,
+    ScalarQuantization,
+    ScalarQuantizationConfig,
+    ScalarType,
     SearchParams,
     SparseVector,
     SparseVectorParams,
@@ -62,6 +70,8 @@
     NotExpr,
     NotInExpr,
     OrExpr,
+    QuantizationConfig,
+    QuantizationType,
     RecommendStmt,
     SearchStmt,
     SearchWith,
@@ -292,37 +302,55 @@ def _execute_create(self, node: CreateCollectionStmt) -> ExecutionResult:
 
         dense_model_name = node.model or self._config.default_model
 
+        # Build optional quantization config (None when QUANTIZE clause absent)
+        quant_config = (
+            self._build_quantization_config(node.quantization)
+            if node.quantization is not None
+            else None
+        )
+        quant_label = (
+            f", {node.quantization.type.value} quantization"
+            if node.quantization is not None
+            else ""
+        )
+
         # ── Hybrid collection: named dense + sparse vectors ────────────────
         if node.hybrid:
             embedder = Embedder(dense_model_name)
             dims = embedder.dimensions
-            self._create_collection_and_wait(
-                collection_name=node.collection,
-                vectors_config={
+            create_kwargs: dict[str, Any] = {
+                "collection_name": node.collection,
+                "vectors_config": {
                     "dense": VectorParams(size=dims, distance=Distance.COSINE)
                 },
-                sparse_vectors_config={
+                "sparse_vectors_config": {
                     "sparse": SparseVectorParams(modifier=Modifier.IDF)
                 },
-            )
+            }
+            if quant_config is not None:
+                create_kwargs["quantization_config"] = quant_config
+            self._create_collection_and_wait(**create_kwargs)
             return ExecutionResult(
                 success=True,
                 message=(
                     f"Collection '{node.collection}' created "
-                    f"(hybrid: {dims}-dim dense + BM25 sparse, cosine distance)"
+                    f"(hybrid: {dims}-dim dense + BM25 sparse, cosine distance{quant_label})"
                 ),
             )
 
         # ── Standard dense-only collection ─────────────────────────────────
         embedder = Embedder(dense_model_name)
         dims = embedder.dimensions
-        self._create_collection_and_wait(
-            collection_name=node.collection,
-            vectors_config=VectorParams(size=dims, distance=Distance.COSINE),
-        )
+        create_kwargs = {
+            "collection_name": node.collection,
+            "vectors_config": VectorParams(size=dims, distance=Distance.COSINE),
+        }
+        if quant_config is not None:
+            create_kwargs["quantization_config"] = quant_config
+        self._create_collection_and_wait(**create_kwargs)
         return ExecutionResult(
             success=True,
-            message=f"Collection '{node.collection}' created ({dims}-dimensional vectors, cosine distance)",
+            message=f"Collection '{node.collection}' created ({dims}-dimensional vectors, cosine distance{quant_label})",
         )
 
     def _execute_create_index(self, node: CreateIndexStmt) -> ExecutionResult:
@@ -816,6 +844,31 @@ def _wrap_as_filter(self, qdrant_expr: Any) -> Filter:
 
     # ── Collection helpers ────────────────────────────────────────────────
 
+    def _build_quantization_config(
+        self, qc: QuantizationConfig
+    ) -> ScalarQuantization | BinaryQuantization | ProductQuantization:
+        """Convert a parsed QuantizationConfig to a Qdrant SDK quantization object."""
+        if qc.type == QuantizationType.SCALAR:
+            return ScalarQuantization(
+                scalar=ScalarQuantizationConfig(
+                    type=ScalarType.INT8,
+                    quantile=qc.quantile,      # None → SDK uses its own default (0.99)
+                    always_ram=qc.always_ram,
+                )
+            )
+        if qc.type == QuantizationType.BINARY:
+            return BinaryQuantization(
+                binary=BinaryQuantizationConfig(always_ram=qc.always_ram)
+            )
+        if qc.type == QuantizationType.PRODUCT:
+            return ProductQuantization(
+                product=ProductQuantizationConfig(
+                    compression=CompressionRatio.X4,
+                    always_ram=qc.always_ram,
+                )
+            )
+        raise QQLRuntimeError(f"Unknown quantization type: {qc.type}")
+
     def _collection_is_hybrid(self, name: str) -> bool:
         """Return True if *name* exists and uses named vectors (hybrid collection)."""
         if not self._client.collection_exists(name):
diff --git a/src/qql/lexer.py b/src/qql/lexer.py
index 49f4683..56ed1c7 100644
--- a/src/qql/lexer.py
+++ b/src/qql/lexer.py
@@ -20,6 +20,13 @@ class TokenKind(Enum):
     EXACT = auto()
     WITH = auto()
     ACORN = auto()
+    QUANTIZE = auto()
+    SCALAR   = auto()
+    BINARY   = auto()
+    PRODUCT  = auto()
+    QUANTILE = auto()
+    ALWAYS   = auto()
+    RAM      = auto()
     CREATE = auto()
     INDEX = auto()
     ON = auto()
@@ -98,7 +105,14 @@ class TokenKind(Enum):
     "RERANK": TokenKind.RERANK,
     "EXACT": TokenKind.EXACT,
     "WITH": TokenKind.WITH,
-    "ACORN": TokenKind.ACORN,
+    "ACORN":    TokenKind.ACORN,
+    "QUANTIZE": TokenKind.QUANTIZE,
+    "SCALAR":   TokenKind.SCALAR,
+    "BINARY":   TokenKind.BINARY,
+    "PRODUCT":  TokenKind.PRODUCT,
+    "QUANTILE": TokenKind.QUANTILE,
+    "ALWAYS":   TokenKind.ALWAYS,
+    "RAM":      TokenKind.RAM,
     "CREATE": TokenKind.CREATE,
     "INDEX": TokenKind.INDEX,
     "ON": TokenKind.ON,
diff --git a/src/qql/parser.py b/src/qql/parser.py
index 97ec325..9331dc2 100644
--- a/src/qql/parser.py
+++ b/src/qql/parser.py
@@ -23,6 +23,8 @@
     NotExpr,
     NotInExpr,
     OrExpr,
+    QuantizationConfig,
+    QuantizationType,
     RecommendStmt,
     SearchStmt,
     SearchWith,
@@ -175,10 +177,17 @@ def _parse_create(self) -> CreateCollectionStmt:
                     self._expect(TokenKind.MODEL)
                     model = self._expect(TokenKind.STRING).value
 
+            # ── Optional QUANTIZE clause ──────────────────────────────────
+            quantization: QuantizationConfig | None = None
+            if self._peek().kind == TokenKind.QUANTIZE:
+                self._advance()  # consume QUANTIZE
+                quantization = self._parse_quantize_clause()
+
             return CreateCollectionStmt(
                 collection=collection,
                 hybrid=hybrid,
                 model=model,
+                quantization=quantization,
             )
 
         self._expect(TokenKind.INDEX)
@@ -191,6 +200,53 @@ def _parse_create(self) -> CreateCollectionStmt:
         schema = self._expect(TokenKind.IDENTIFIER).value.lower()
         return CreateIndexStmt(collection=collection, field_name=field_name, schema=schema)
 
+    def _parse_quantize_clause(self) -> QuantizationConfig:
+        """Parse: (SCALAR | BINARY | PRODUCT) [QUANTILE <float>] [ALWAYS RAM]
+
+        Called immediately after the QUANTIZE token has been consumed.
+        """
+        tok = self._peek()
+
+        if tok.kind == TokenKind.SCALAR:
+            self._advance()
+            quantile: float | None = None
+            always_ram: bool = False
+            if self._peek().kind == TokenKind.QUANTILE:
+                self._advance()
+                quantile = float(self._expect(TokenKind.FLOAT).value)
+            if self._peek().kind == TokenKind.ALWAYS:
+                self._advance()
+                self._expect(TokenKind.RAM)
+                always_ram = True
+            return QuantizationConfig(
+                type=QuantizationType.SCALAR,
+                quantile=quantile,
+                always_ram=always_ram,
+            )
+
+        if tok.kind == TokenKind.BINARY:
+            self._advance()
+            always_ram = False
+            if self._peek().kind == TokenKind.ALWAYS:
+                self._advance()
+                self._expect(TokenKind.RAM)
+                always_ram = True
+            return QuantizationConfig(type=QuantizationType.BINARY, always_ram=always_ram)
+
+        if tok.kind == TokenKind.PRODUCT:
+            self._advance()
+            always_ram = False
+            if self._peek().kind == TokenKind.ALWAYS:
+                self._advance()
+                self._expect(TokenKind.RAM)
+                always_ram = True
+            return QuantizationConfig(type=QuantizationType.PRODUCT, always_ram=always_ram)
+
+        raise QQLSyntaxError(
+            f"Expected SCALAR, BINARY, or PRODUCT after QUANTIZE, got '{tok.value}'",
+            tok.pos,
+        )
+
     def _parse_drop(self) -> DropCollectionStmt:
         self._expect(TokenKind.DROP)
         self._expect(TokenKind.COLLECTION)
diff --git a/tests/test_executor.py b/tests/test_executor.py
index 14aab9b..11100d2 100644
--- a/tests/test_executor.py
+++ b/tests/test_executor.py
@@ -7,6 +7,8 @@
     DropCollectionStmt,
     InsertBulkStmt,
     InsertStmt,
+    QuantizationConfig,
+    QuantizationType,
     RecommendStmt,
     SearchStmt,
     SearchWith,
@@ -1493,3 +1495,148 @@ def test_sparse_only_with_rerank_message(
         result = executor.execute(node)
         assert "sparse" in result.message
         assert "reranked" in result.message
+
+
+# ── TestQuantizeCreate ────────────────────────────────────────────────────────
+
+
+class TestQuantizeCreate:
+    # ── Scalar ────────────────────────────────────────────────────────────
+
+    def test_scalar_passes_scalar_quantization(self, executor, mock_client):
+        from qdrant_client.models import ScalarQuantization
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.SCALAR),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert isinstance(kw.get("quantization_config"), ScalarQuantization)
+
+    def test_scalar_type_is_int8(self, executor, mock_client):
+        from qdrant_client.models import ScalarType
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.SCALAR),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert kw["quantization_config"].scalar.type == ScalarType.INT8
+
+    def test_scalar_quantile_none_by_default(self, executor, mock_client):
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.SCALAR),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert kw["quantization_config"].scalar.quantile is None
+
+    def test_scalar_explicit_quantile(self, executor, mock_client):
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.SCALAR, quantile=0.95),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert kw["quantization_config"].scalar.quantile == pytest.approx(0.95)
+
+    def test_scalar_always_ram_true(self, executor, mock_client):
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.SCALAR, always_ram=True),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert kw["quantization_config"].scalar.always_ram is True
+
+    # ── Binary ────────────────────────────────────────────────────────────
+
+    def test_binary_passes_binary_quantization(self, executor, mock_client):
+        from qdrant_client.models import BinaryQuantization
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.BINARY),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert isinstance(kw.get("quantization_config"), BinaryQuantization)
+
+    def test_binary_always_ram(self, executor, mock_client):
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.BINARY, always_ram=True),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert kw["quantization_config"].binary.always_ram is True
+
+    # ── Product ───────────────────────────────────────────────────────────
+
+    def test_product_passes_product_quantization(self, executor, mock_client):
+        from qdrant_client.models import ProductQuantization
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.PRODUCT),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert isinstance(kw.get("quantization_config"), ProductQuantization)
+
+    def test_product_uses_x4_compression(self, executor, mock_client):
+        from qdrant_client.models import CompressionRatio
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.PRODUCT),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert kw["quantization_config"].product.compression == CompressionRatio.X4
+
+    # ── Combined with hybrid ──────────────────────────────────────────────
+
+    def test_hybrid_with_quantization_has_both_configs(self, executor, mock_client):
+        from qdrant_client.models import ScalarQuantization
+        node = CreateCollectionStmt(
+            collection="articles",
+            hybrid=True,
+            quantization=QuantizationConfig(type=QuantizationType.SCALAR),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert isinstance(kw.get("quantization_config"), ScalarQuantization)
+        assert "sparse_vectors_config" in kw
+
+    def test_hybrid_with_quantization_vectors_config_is_named_dict(self, executor, mock_client):
+        node = CreateCollectionStmt(
+            collection="articles",
+            hybrid=True,
+            quantization=QuantizationConfig(type=QuantizationType.BINARY),
+        )
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert isinstance(kw["vectors_config"], dict)
+        assert "dense" in kw["vectors_config"]
+
+    # ── No quantization — backward compatibility ──────────────────────────
+
+    def test_no_quantization_omits_kwarg(self, executor, mock_client):
+        node = CreateCollectionStmt(collection="articles")
+        executor.execute(node)
+        kw = mock_client.create_collection.call_args.kwargs
+        assert "quantization_config" not in kw
+
+    # ── Result message ────────────────────────────────────────────────────
+
+    def test_result_message_includes_quantization_type(self, executor, mock_client):
+        node = CreateCollectionStmt(
+            collection="articles",
+            quantization=QuantizationConfig(type=QuantizationType.SCALAR),
+        )
+        result = executor.execute(node)
+        assert "scalar" in result.message
+
+    def test_result_message_no_quantization_suffix_when_absent(self, executor, mock_client):
+        node = CreateCollectionStmt(collection="articles")
+        result = executor.execute(node)
+        assert "quantization" not in result.message
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 6804561..5eb0673 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -21,6 +21,8 @@
     NotExpr,
     NotInExpr,
     OrExpr,
+    QuantizationConfig,
+    QuantizationType,
     RecommendStmt,
     SearchStmt,
     SearchWith,
@@ -905,3 +907,109 @@ def test_sparse_only_false_for_hybrid(self):
         node = parse("SEARCH col SIMILAR TO 'q' LIMIT 5 USING HYBRID")
         assert node.sparse_only is False
         assert node.hybrid is True
+
+
+# ── TestQuantizeCreate ────────────────────────────────────────────────────────
+
+
+class TestQuantizeCreate:
+    # ── Scalar — no options ───────────────────────────────────────────────
+
+    def test_scalar_no_options(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE SCALAR")
+        assert isinstance(node, CreateCollectionStmt)
+        assert node.quantization is not None
+        assert node.quantization.type == QuantizationType.SCALAR
+        assert node.quantization.quantile is None
+        assert node.quantization.always_ram is False
+
+    def test_scalar_with_quantile(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 0.99")
+        assert node.quantization.type == QuantizationType.SCALAR
+        assert node.quantization.quantile == pytest.approx(0.99)
+
+    def test_scalar_with_always_ram(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE SCALAR ALWAYS RAM")
+        assert node.quantization.always_ram is True
+        assert node.quantization.quantile is None
+
+    def test_scalar_quantile_and_always_ram(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 0.95 ALWAYS RAM")
+        assert node.quantization.quantile == pytest.approx(0.95)
+        assert node.quantization.always_ram is True
+
+    # ── Binary ────────────────────────────────────────────────────────────
+
+    def test_binary_no_options(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE BINARY")
+        assert isinstance(node, CreateCollectionStmt)
+        assert node.quantization.type == QuantizationType.BINARY
+        assert node.quantization.always_ram is False
+
+    def test_binary_with_always_ram(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE BINARY ALWAYS RAM")
+        assert node.quantization.type == QuantizationType.BINARY
+        assert node.quantization.always_ram is True
+
+    # ── Product ───────────────────────────────────────────────────────────
+
+    def test_product_no_options(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE PRODUCT")
+        assert isinstance(node, CreateCollectionStmt)
+        assert node.quantization.type == QuantizationType.PRODUCT
+        assert node.quantization.always_ram is False
+
+    def test_product_with_always_ram(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE PRODUCT ALWAYS RAM")
+        assert node.quantization.type == QuantizationType.PRODUCT
+        assert node.quantization.always_ram is True
+
+    # ── Combined with HYBRID / MODEL ──────────────────────────────────────
+
+    def test_combined_with_hybrid_shorthand(self):
+        node = parse("CREATE COLLECTION articles HYBRID QUANTIZE SCALAR")
+        assert node.hybrid is True
+        assert node.quantization.type == QuantizationType.SCALAR
+
+    def test_combined_with_using_hybrid(self):
+        node = parse("CREATE COLLECTION articles USING HYBRID QUANTIZE BINARY")
+        assert node.hybrid is True
+        assert node.quantization.type == QuantizationType.BINARY
+
+    def test_combined_with_using_model(self):
+        node = parse(
+            "CREATE COLLECTION articles USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE SCALAR"
+        )
+        assert node.model == "BAAI/bge-base-en-v1.5"
+        assert node.hybrid is False
+        assert node.quantization.type == QuantizationType.SCALAR
+
+    def test_combined_with_hybrid_dense_model(self):
+        node = parse(
+            "CREATE COLLECTION articles USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'"
+            " QUANTIZE SCALAR"
+        )
+        assert node.hybrid is True
+        assert node.model == "BAAI/bge-base-en-v1.5"
+        assert node.quantization.type == QuantizationType.SCALAR
+
+    # ── Backward compatibility ────────────────────────────────────────────
+
+    def test_no_quantize_clause_is_none(self):
+        node = parse("CREATE COLLECTION articles")
+        assert node.quantization is None
+
+    def test_no_quantize_with_hybrid_is_none(self):
+        node = parse("CREATE COLLECTION articles HYBRID")
+        assert node.hybrid is True
+        assert node.quantization is None
+
+    # ── Error cases ───────────────────────────────────────────────────────
+
+    def test_quantize_missing_type_raises(self):
+        with pytest.raises(QQLSyntaxError):
+            parse("CREATE COLLECTION articles QUANTIZE")
+
+    def test_quantize_unknown_type_raises(self):
+        with pytest.raises(QQLSyntaxError):
+            parse("CREATE COLLECTION articles QUANTIZE FULL")

From ede35ccc5c4b8f9df95a06d1d61ab64bfcc5a5a3 Mon Sep 17 00:00:00 2001
From: Srimon Danguria <srimon12mckv@gmail.com>
Date: Wed, 29 Apr 2026 19:54:52 +0530
Subject: [PATCH 2/2] feat: update _parse_create method to support
 CreateIndexStmt and add quantile validation tests (#18)

---
 src/qql/parser.py    | 10 ++++++++--
 tests/test_parser.py | 18 ++++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/qql/parser.py b/src/qql/parser.py
index 9331dc2..42f9fa2 100644
--- a/src/qql/parser.py
+++ b/src/qql/parser.py
@@ -151,7 +151,7 @@ def _parse_insert_bulk_body(self) -> InsertBulkStmt:
             model=model, hybrid=hybrid, sparse_model=sparse_model,
         )
 
-    def _parse_create(self) -> CreateCollectionStmt:
+    def _parse_create(self) -> CreateCollectionStmt | CreateIndexStmt:
         self._expect(TokenKind.CREATE)
         if self._peek().kind == TokenKind.COLLECTION:
             self._advance()
@@ -213,7 +213,13 @@ def _parse_quantize_clause(self) -> QuantizationConfig:
             always_ram: bool = False
             if self._peek().kind == TokenKind.QUANTILE:
                 self._advance()
-                quantile = float(self._expect(TokenKind.FLOAT).value)
+                quantile_tok = self._peek()
+                quantile = float(self._parse_number())
+                if not 0.0 <= quantile <= 1.0:
+                    raise QQLSyntaxError(
+                        f"QUANTILE must be between 0 and 1 inclusive, got {quantile}",
+                        quantile_tok.pos,
+                    )
             if self._peek().kind == TokenKind.ALWAYS:
                 self._advance()
                 self._expect(TokenKind.RAM)
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 5eb0673..e0e07d5 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -928,6 +928,16 @@ def test_scalar_with_quantile(self):
         assert node.quantization.type == QuantizationType.SCALAR
         assert node.quantization.quantile == pytest.approx(0.99)
 
+    def test_scalar_with_quantile_zero(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 0")
+        assert node.quantization.type == QuantizationType.SCALAR
+        assert node.quantization.quantile == pytest.approx(0.0)
+
+    def test_scalar_with_quantile_one(self):
+        node = parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 1")
+        assert node.quantization.type == QuantizationType.SCALAR
+        assert node.quantization.quantile == pytest.approx(1.0)
+
     def test_scalar_with_always_ram(self):
         node = parse("CREATE COLLECTION articles QUANTIZE SCALAR ALWAYS RAM")
         assert node.quantization.always_ram is True
@@ -1013,3 +1023,11 @@ def test_quantize_missing_type_raises(self):
     def test_quantize_unknown_type_raises(self):
         with pytest.raises(QQLSyntaxError):
             parse("CREATE COLLECTION articles QUANTIZE FULL")
+
+    def test_scalar_quantile_above_one_raises(self):
+        with pytest.raises(QQLSyntaxError):
+            parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 1.5")
+
+    def test_scalar_quantile_integer_above_one_raises(self):
+        with pytest.raises(QQLSyntaxError):
+            parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 2")