From 2a210814329553645ac1230d2e7a0ab9c5107db0 Mon Sep 17 00:00:00 2001 From: "manthapavankumar11@gmail.com" Date: Wed, 29 Apr 2026 06:05:33 +0530 Subject: [PATCH 1/2] add quantization to the create collection feature --- README.md | 77 +++++++++++++++++++++ src/qql/ast_nodes.py | 20 +++++- src/qql/cli.py | 4 ++ src/qql/executor.py | 75 ++++++++++++++++++--- src/qql/lexer.py | 16 ++++- src/qql/parser.py | 56 ++++++++++++++++ tests/test_executor.py | 147 +++++++++++++++++++++++++++++++++++++++++ tests/test_parser.py | 108 ++++++++++++++++++++++++++++++ 8 files changed, 489 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index d6a3c09..b45358f 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ qql> SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 USING HYBRID RERANK - [Cross-Encoder Reranking (RERANK)](#cross-encoder-reranking-rerank) - [SHOW COLLECTIONS — list collections](#show-collections--list-collections) - [CREATE COLLECTION — create a collection](#create-collection--create-a-collection) + - [Quantization — QUANTIZE clause](#quantization--quantize-clause) - [CREATE INDEX — create a payload index](#create-index--create-a-payload-index) - [DROP COLLECTION — delete a collection](#drop-collection--delete-a-collection) - [DELETE — remove points](#delete--remove-points) @@ -903,6 +904,82 @@ If the collection already exists, the command succeeds with a message and does n --- +### Quantization — QUANTIZE clause + +Quantization reduces the memory footprint of vector collections and speeds up search at the cost of a small, controllable accuracy loss. QQL supports all three Qdrant quantization strategies via an optional `QUANTIZE` clause appended to `CREATE COLLECTION`. + +**Three strategies:** + +| Type | Compression | Accuracy Loss | Best For | +|---|---|---|---| +| `SCALAR` | 4× (float32 → int8) | < 1% | Most collections — best balance | +| `BINARY` | 32× (float32 → 1-bit) | Higher | High-dimensional vectors (768+), speed priority | +| `PRODUCT` | 4× (configurable) | Variable | Memory-constrained deployments | + +**Full syntax:** +``` +CREATE COLLECTION ... QUANTIZE SCALAR [QUANTILE <0.0–1.0>] [ALWAYS RAM] +CREATE COLLECTION ... QUANTIZE BINARY [ALWAYS RAM] +CREATE COLLECTION ... QUANTIZE PRODUCT [ALWAYS RAM] +``` + +- **`QUANTILE `** — (scalar only) calibration quantile for the INT8 conversion; defaults to Qdrant's built-in default (0.99) when omitted. Lower values improve outlier handling at the cost of a slightly wider value range. +- **`ALWAYS RAM`** — keep the **original** (unquantized) vectors in RAM for rescoring, sacrificing memory savings but preserving accuracy during re-ranking. Supported by all three types. +- **`QUANTIZE`** always appears **after** all other clauses (`HYBRID`, `USING MODEL`, etc.). +- For `PRODUCT`, the compression ratio is fixed at **4×** in this version. +- When used with `HYBRID` collections, quantization applies only to the **dense** vector (Qdrant's behavior). + +**Examples:** + +Scalar quantization (recommended default): +```sql +CREATE COLLECTION research_papers QUANTIZE SCALAR +``` + +Scalar with explicit calibration and original vectors kept in RAM: +```sql +CREATE COLLECTION research_papers QUANTIZE SCALAR QUANTILE 0.95 ALWAYS RAM +``` + +Binary quantization for large high-dimensional embeddings: +```sql +CREATE COLLECTION research_papers QUANTIZE BINARY +``` + +Product quantization for maximum memory savings: +```sql +CREATE COLLECTION research_papers QUANTIZE PRODUCT ALWAYS RAM +``` + +Combined with hybrid collection: +```sql +CREATE COLLECTION research_papers HYBRID QUANTIZE SCALAR +``` + +Combined with a pinned model: +```sql +CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE SCALAR QUANTILE 0.99 +``` + +Combined with hybrid + dense model: +```sql +CREATE COLLECTION research_papers USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE BINARY +``` + +**Valid combinations:** + +| Base form | + QUANTIZE SCALAR | + QUANTIZE BINARY | + QUANTIZE PRODUCT | +|---|---|---|---| +| `CREATE COLLECTION name` | ✓ | ✓ | ✓ | +| `... HYBRID` | ✓ | ✓ | ✓ | +| `... USING MODEL 'x'` | ✓ | ✓ | ✓ | +| `... USING HYBRID` | ✓ | ✓ | ✓ | +| `... USING HYBRID DENSE MODEL 'x'` | ✓ | ✓ | ✓ | + +> **Note:** INSERT and SEARCH on quantized collections work exactly the same as on non-quantized ones — no changes to INSERT or SEARCH syntax are needed. + +--- + ### CREATE INDEX — create a payload index Creates a payload index on a collection field. Payload indexes speed up `WHERE` clause filtering by allowing Qdrant to efficiently match on indexed fields. diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py index 1f282f9..5aa0562 100644 --- a/src/qql/ast_nodes.py +++ b/src/qql/ast_nodes.py @@ -1,9 +1,24 @@ from __future__ import annotations from dataclasses import dataclass +from enum import Enum from typing import Any, Union +class QuantizationType(Enum): + SCALAR = "scalar" + BINARY = "binary" + PRODUCT = "product" + + +@dataclass(frozen=True) +class QuantizationConfig: + """Quantization settings parsed from a QUANTIZE clause.""" + type: QuantizationType + quantile: float | None = None # SCALAR only; None → Qdrant default (0.99) + always_ram: bool = False # all types; default False + + @dataclass(frozen=True) class SearchWith: """Query-time search params supported by Qdrant SearchParams.""" @@ -141,8 +156,9 @@ class InsertBulkStmt: @dataclass(frozen=True) class CreateCollectionStmt: collection: str - hybrid: bool = False # if True, create with dense + sparse named vectors - model: str | None = None # dense model; None → use config default + hybrid: bool = False # if True, create with dense + sparse named vectors + model: str | None = None # dense model; None → use config default + quantization: QuantizationConfig | None = None # optional QUANTIZE clause @dataclass(frozen=True) diff --git a/src/qql/cli.py b/src/qql/cli.py index bfc169a..7e4acf7 100644 --- a/src/qql/cli.py +++ b/src/qql/cli.py @@ -38,6 +38,10 @@ Create a new collection. Add HYBRID for dense+sparse BM25 vectors. Optional: [yellow]USING MODEL[/yellow] '' Optional: [yellow]USING HYBRID[/yellow] [DENSE MODEL ''] + Optional: [yellow]QUANTIZE SCALAR[/yellow] [QUANTILE <0.0–1.0>] [ALWAYS RAM] + Optional: [yellow]QUANTIZE BINARY[/yellow] [ALWAYS RAM] + Optional: [yellow]QUANTIZE PRODUCT[/yellow] [ALWAYS RAM] (4× compression) + QUANTIZE may be combined with any HYBRID or MODEL clause. [yellow]DROP COLLECTION[/yellow] Delete a collection and all its points. diff --git a/src/qql/executor.py b/src/qql/executor.py index 1a8d8fa..f43b2d8 100644 --- a/src/qql/executor.py +++ b/src/qql/executor.py @@ -9,6 +9,9 @@ from qdrant_client.http.exceptions import UnexpectedResponse from qdrant_client.models import ( AcornSearchParams, + BinaryQuantization, + BinaryQuantizationConfig, + CompressionRatio, Distance, FieldCondition, Filter, @@ -29,10 +32,15 @@ PayloadSchemaType, PointStruct, Prefetch, + ProductQuantization, + ProductQuantizationConfig, Range, RecommendInput, RecommendQuery, RecommendStrategy, + ScalarQuantization, + ScalarQuantizationConfig, + ScalarType, SearchParams, SparseVector, SparseVectorParams, @@ -62,6 +70,8 @@ NotExpr, NotInExpr, OrExpr, + QuantizationConfig, + QuantizationType, RecommendStmt, SearchStmt, SearchWith, @@ -292,37 +302,55 @@ def _execute_create(self, node: CreateCollectionStmt) -> ExecutionResult: dense_model_name = node.model or self._config.default_model + # Build optional quantization config (None when QUANTIZE clause absent) + quant_config = ( + self._build_quantization_config(node.quantization) + if node.quantization is not None + else None + ) + quant_label = ( + f", {node.quantization.type.value} quantization" + if node.quantization is not None + else "" + ) + # ── Hybrid collection: named dense + sparse vectors ──────────────── if node.hybrid: embedder = Embedder(dense_model_name) dims = embedder.dimensions - self._create_collection_and_wait( - collection_name=node.collection, - vectors_config={ + create_kwargs: dict[str, Any] = { + "collection_name": node.collection, + "vectors_config": { "dense": VectorParams(size=dims, distance=Distance.COSINE) }, - sparse_vectors_config={ + "sparse_vectors_config": { "sparse": SparseVectorParams(modifier=Modifier.IDF) }, - ) + } + if quant_config is not None: + create_kwargs["quantization_config"] = quant_config + self._create_collection_and_wait(**create_kwargs) return ExecutionResult( success=True, message=( f"Collection '{node.collection}' created " - f"(hybrid: {dims}-dim dense + BM25 sparse, cosine distance)" + f"(hybrid: {dims}-dim dense + BM25 sparse, cosine distance{quant_label})" ), ) # ── Standard dense-only collection ───────────────────────────────── embedder = Embedder(dense_model_name) dims = embedder.dimensions - self._create_collection_and_wait( - collection_name=node.collection, - vectors_config=VectorParams(size=dims, distance=Distance.COSINE), - ) + create_kwargs = { + "collection_name": node.collection, + "vectors_config": VectorParams(size=dims, distance=Distance.COSINE), + } + if quant_config is not None: + create_kwargs["quantization_config"] = quant_config + self._create_collection_and_wait(**create_kwargs) return ExecutionResult( success=True, - message=f"Collection '{node.collection}' created ({dims}-dimensional vectors, cosine distance)", + message=f"Collection '{node.collection}' created ({dims}-dimensional vectors, cosine distance{quant_label})", ) def _execute_create_index(self, node: CreateIndexStmt) -> ExecutionResult: @@ -816,6 +844,31 @@ def _wrap_as_filter(self, qdrant_expr: Any) -> Filter: # ── Collection helpers ──────────────────────────────────────────────── + def _build_quantization_config( + self, qc: QuantizationConfig + ) -> ScalarQuantization | BinaryQuantization | ProductQuantization: + """Convert a parsed QuantizationConfig to a Qdrant SDK quantization object.""" + if qc.type == QuantizationType.SCALAR: + return ScalarQuantization( + scalar=ScalarQuantizationConfig( + type=ScalarType.INT8, + quantile=qc.quantile, # None → SDK uses its own default (0.99) + always_ram=qc.always_ram, + ) + ) + if qc.type == QuantizationType.BINARY: + return BinaryQuantization( + binary=BinaryQuantizationConfig(always_ram=qc.always_ram) + ) + if qc.type == QuantizationType.PRODUCT: + return ProductQuantization( + product=ProductQuantizationConfig( + compression=CompressionRatio.X4, + always_ram=qc.always_ram, + ) + ) + raise QQLRuntimeError(f"Unknown quantization type: {qc.type}") + def _collection_is_hybrid(self, name: str) -> bool: """Return True if *name* exists and uses named vectors (hybrid collection).""" if not self._client.collection_exists(name): diff --git a/src/qql/lexer.py b/src/qql/lexer.py index 49f4683..56ed1c7 100644 --- a/src/qql/lexer.py +++ b/src/qql/lexer.py @@ -20,6 +20,13 @@ class TokenKind(Enum): EXACT = auto() WITH = auto() ACORN = auto() + QUANTIZE = auto() + SCALAR = auto() + BINARY = auto() + PRODUCT = auto() + QUANTILE = auto() + ALWAYS = auto() + RAM = auto() CREATE = auto() INDEX = auto() ON = auto() @@ -98,7 +105,14 @@ class TokenKind(Enum): "RERANK": TokenKind.RERANK, "EXACT": TokenKind.EXACT, "WITH": TokenKind.WITH, - "ACORN": TokenKind.ACORN, + "ACORN": TokenKind.ACORN, + "QUANTIZE": TokenKind.QUANTIZE, + "SCALAR": TokenKind.SCALAR, + "BINARY": TokenKind.BINARY, + "PRODUCT": TokenKind.PRODUCT, + "QUANTILE": TokenKind.QUANTILE, + "ALWAYS": TokenKind.ALWAYS, + "RAM": TokenKind.RAM, "CREATE": TokenKind.CREATE, "INDEX": TokenKind.INDEX, "ON": TokenKind.ON, diff --git a/src/qql/parser.py b/src/qql/parser.py index 97ec325..9331dc2 100644 --- a/src/qql/parser.py +++ b/src/qql/parser.py @@ -23,6 +23,8 @@ NotExpr, NotInExpr, OrExpr, + QuantizationConfig, + QuantizationType, RecommendStmt, SearchStmt, SearchWith, @@ -175,10 +177,17 @@ def _parse_create(self) -> CreateCollectionStmt: self._expect(TokenKind.MODEL) model = self._expect(TokenKind.STRING).value + # ── Optional QUANTIZE clause ────────────────────────────────── + quantization: QuantizationConfig | None = None + if self._peek().kind == TokenKind.QUANTIZE: + self._advance() # consume QUANTIZE + quantization = self._parse_quantize_clause() + return CreateCollectionStmt( collection=collection, hybrid=hybrid, model=model, + quantization=quantization, ) self._expect(TokenKind.INDEX) @@ -191,6 +200,53 @@ def _parse_create(self) -> CreateCollectionStmt: schema = self._expect(TokenKind.IDENTIFIER).value.lower() return CreateIndexStmt(collection=collection, field_name=field_name, schema=schema) + def _parse_quantize_clause(self) -> QuantizationConfig: + """Parse: (SCALAR | BINARY | PRODUCT) [QUANTILE ] [ALWAYS RAM] + + Called immediately after the QUANTIZE token has been consumed. + """ + tok = self._peek() + + if tok.kind == TokenKind.SCALAR: + self._advance() + quantile: float | None = None + always_ram: bool = False + if self._peek().kind == TokenKind.QUANTILE: + self._advance() + quantile = float(self._expect(TokenKind.FLOAT).value) + if self._peek().kind == TokenKind.ALWAYS: + self._advance() + self._expect(TokenKind.RAM) + always_ram = True + return QuantizationConfig( + type=QuantizationType.SCALAR, + quantile=quantile, + always_ram=always_ram, + ) + + if tok.kind == TokenKind.BINARY: + self._advance() + always_ram = False + if self._peek().kind == TokenKind.ALWAYS: + self._advance() + self._expect(TokenKind.RAM) + always_ram = True + return QuantizationConfig(type=QuantizationType.BINARY, always_ram=always_ram) + + if tok.kind == TokenKind.PRODUCT: + self._advance() + always_ram = False + if self._peek().kind == TokenKind.ALWAYS: + self._advance() + self._expect(TokenKind.RAM) + always_ram = True + return QuantizationConfig(type=QuantizationType.PRODUCT, always_ram=always_ram) + + raise QQLSyntaxError( + f"Expected SCALAR, BINARY, or PRODUCT after QUANTIZE, got '{tok.value}'", + tok.pos, + ) + def _parse_drop(self) -> DropCollectionStmt: self._expect(TokenKind.DROP) self._expect(TokenKind.COLLECTION) diff --git a/tests/test_executor.py b/tests/test_executor.py index 14aab9b..11100d2 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -7,6 +7,8 @@ DropCollectionStmt, InsertBulkStmt, InsertStmt, + QuantizationConfig, + QuantizationType, RecommendStmt, SearchStmt, SearchWith, @@ -1493,3 +1495,148 @@ def test_sparse_only_with_rerank_message( result = executor.execute(node) assert "sparse" in result.message assert "reranked" in result.message + + +# ── TestQuantizeCreate ──────────────────────────────────────────────────────── + + +class TestQuantizeCreate: + # ── Scalar ──────────────────────────────────────────────────────────── + + def test_scalar_passes_scalar_quantization(self, executor, mock_client): + from qdrant_client.models import ScalarQuantization + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.SCALAR), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert isinstance(kw.get("quantization_config"), ScalarQuantization) + + def test_scalar_type_is_int8(self, executor, mock_client): + from qdrant_client.models import ScalarType + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.SCALAR), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert kw["quantization_config"].scalar.type == ScalarType.INT8 + + def test_scalar_quantile_none_by_default(self, executor, mock_client): + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.SCALAR), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert kw["quantization_config"].scalar.quantile is None + + def test_scalar_explicit_quantile(self, executor, mock_client): + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.SCALAR, quantile=0.95), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert kw["quantization_config"].scalar.quantile == pytest.approx(0.95) + + def test_scalar_always_ram_true(self, executor, mock_client): + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.SCALAR, always_ram=True), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert kw["quantization_config"].scalar.always_ram is True + + # ── Binary ──────────────────────────────────────────────────────────── + + def test_binary_passes_binary_quantization(self, executor, mock_client): + from qdrant_client.models import BinaryQuantization + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.BINARY), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert isinstance(kw.get("quantization_config"), BinaryQuantization) + + def test_binary_always_ram(self, executor, mock_client): + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.BINARY, always_ram=True), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert kw["quantization_config"].binary.always_ram is True + + # ── Product ─────────────────────────────────────────────────────────── + + def test_product_passes_product_quantization(self, executor, mock_client): + from qdrant_client.models import ProductQuantization + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.PRODUCT), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert isinstance(kw.get("quantization_config"), ProductQuantization) + + def test_product_uses_x4_compression(self, executor, mock_client): + from qdrant_client.models import CompressionRatio + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.PRODUCT), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert kw["quantization_config"].product.compression == CompressionRatio.X4 + + # ── Combined with hybrid ────────────────────────────────────────────── + + def test_hybrid_with_quantization_has_both_configs(self, executor, mock_client): + from qdrant_client.models import ScalarQuantization + node = CreateCollectionStmt( + collection="articles", + hybrid=True, + quantization=QuantizationConfig(type=QuantizationType.SCALAR), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert isinstance(kw.get("quantization_config"), ScalarQuantization) + assert "sparse_vectors_config" in kw + + def test_hybrid_with_quantization_vectors_config_is_named_dict(self, executor, mock_client): + node = CreateCollectionStmt( + collection="articles", + hybrid=True, + quantization=QuantizationConfig(type=QuantizationType.BINARY), + ) + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert isinstance(kw["vectors_config"], dict) + assert "dense" in kw["vectors_config"] + + # ── No quantization — backward compatibility ────────────────────────── + + def test_no_quantization_omits_kwarg(self, executor, mock_client): + node = CreateCollectionStmt(collection="articles") + executor.execute(node) + kw = mock_client.create_collection.call_args.kwargs + assert "quantization_config" not in kw + + # ── Result message ──────────────────────────────────────────────────── + + def test_result_message_includes_quantization_type(self, executor, mock_client): + node = CreateCollectionStmt( + collection="articles", + quantization=QuantizationConfig(type=QuantizationType.SCALAR), + ) + result = executor.execute(node) + assert "scalar" in result.message + + def test_result_message_no_quantization_suffix_when_absent(self, executor, mock_client): + node = CreateCollectionStmt(collection="articles") + result = executor.execute(node) + assert "quantization" not in result.message diff --git a/tests/test_parser.py b/tests/test_parser.py index 6804561..5eb0673 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -21,6 +21,8 @@ NotExpr, NotInExpr, OrExpr, + QuantizationConfig, + QuantizationType, RecommendStmt, SearchStmt, SearchWith, @@ -905,3 +907,109 @@ def test_sparse_only_false_for_hybrid(self): node = parse("SEARCH col SIMILAR TO 'q' LIMIT 5 USING HYBRID") assert node.sparse_only is False assert node.hybrid is True + + +# ── TestQuantizeCreate ──────────────────────────────────────────────────────── + + +class TestQuantizeCreate: + # ── Scalar — no options ─────────────────────────────────────────────── + + def test_scalar_no_options(self): + node = parse("CREATE COLLECTION articles QUANTIZE SCALAR") + assert isinstance(node, CreateCollectionStmt) + assert node.quantization is not None + assert node.quantization.type == QuantizationType.SCALAR + assert node.quantization.quantile is None + assert node.quantization.always_ram is False + + def test_scalar_with_quantile(self): + node = parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 0.99") + assert node.quantization.type == QuantizationType.SCALAR + assert node.quantization.quantile == pytest.approx(0.99) + + def test_scalar_with_always_ram(self): + node = parse("CREATE COLLECTION articles QUANTIZE SCALAR ALWAYS RAM") + assert node.quantization.always_ram is True + assert node.quantization.quantile is None + + def test_scalar_quantile_and_always_ram(self): + node = parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 0.95 ALWAYS RAM") + assert node.quantization.quantile == pytest.approx(0.95) + assert node.quantization.always_ram is True + + # ── Binary ──────────────────────────────────────────────────────────── + + def test_binary_no_options(self): + node = parse("CREATE COLLECTION articles QUANTIZE BINARY") + assert isinstance(node, CreateCollectionStmt) + assert node.quantization.type == QuantizationType.BINARY + assert node.quantization.always_ram is False + + def test_binary_with_always_ram(self): + node = parse("CREATE COLLECTION articles QUANTIZE BINARY ALWAYS RAM") + assert node.quantization.type == QuantizationType.BINARY + assert node.quantization.always_ram is True + + # ── Product ─────────────────────────────────────────────────────────── + + def test_product_no_options(self): + node = parse("CREATE COLLECTION articles QUANTIZE PRODUCT") + assert isinstance(node, CreateCollectionStmt) + assert node.quantization.type == QuantizationType.PRODUCT + assert node.quantization.always_ram is False + + def test_product_with_always_ram(self): + node = parse("CREATE COLLECTION articles QUANTIZE PRODUCT ALWAYS RAM") + assert node.quantization.type == QuantizationType.PRODUCT + assert node.quantization.always_ram is True + + # ── Combined with HYBRID / MODEL ────────────────────────────────────── + + def test_combined_with_hybrid_shorthand(self): + node = parse("CREATE COLLECTION articles HYBRID QUANTIZE SCALAR") + assert node.hybrid is True + assert node.quantization.type == QuantizationType.SCALAR + + def test_combined_with_using_hybrid(self): + node = parse("CREATE COLLECTION articles USING HYBRID QUANTIZE BINARY") + assert node.hybrid is True + assert node.quantization.type == QuantizationType.BINARY + + def test_combined_with_using_model(self): + node = parse( + "CREATE COLLECTION articles USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE SCALAR" + ) + assert node.model == "BAAI/bge-base-en-v1.5" + assert node.hybrid is False + assert node.quantization.type == QuantizationType.SCALAR + + def test_combined_with_hybrid_dense_model(self): + node = parse( + "CREATE COLLECTION articles USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'" + " QUANTIZE SCALAR" + ) + assert node.hybrid is True + assert node.model == "BAAI/bge-base-en-v1.5" + assert node.quantization.type == QuantizationType.SCALAR + + # ── Backward compatibility ──────────────────────────────────────────── + + def test_no_quantize_clause_is_none(self): + node = parse("CREATE COLLECTION articles") + assert node.quantization is None + + def test_no_quantize_with_hybrid_is_none(self): + node = parse("CREATE COLLECTION articles HYBRID") + assert node.hybrid is True + assert node.quantization is None + + # ── Error cases ─────────────────────────────────────────────────────── + + def test_quantize_missing_type_raises(self): + with pytest.raises(QQLSyntaxError): + parse("CREATE COLLECTION articles QUANTIZE") + + def test_quantize_unknown_type_raises(self): + with pytest.raises(QQLSyntaxError): + parse("CREATE COLLECTION articles QUANTIZE FULL") From ede35ccc5c4b8f9df95a06d1d61ab64bfcc5a5a3 Mon Sep 17 00:00:00 2001 From: Srimon Danguria Date: Wed, 29 Apr 2026 19:54:52 +0530 Subject: [PATCH 2/2] feat: update _parse_create method to support CreateIndexStmt and add quantile validation tests (#18) --- src/qql/parser.py | 10 ++++++++-- tests/test_parser.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/qql/parser.py b/src/qql/parser.py index 9331dc2..42f9fa2 100644 --- a/src/qql/parser.py +++ b/src/qql/parser.py @@ -151,7 +151,7 @@ def _parse_insert_bulk_body(self) -> InsertBulkStmt: model=model, hybrid=hybrid, sparse_model=sparse_model, ) - def _parse_create(self) -> CreateCollectionStmt: + def _parse_create(self) -> CreateCollectionStmt | CreateIndexStmt: self._expect(TokenKind.CREATE) if self._peek().kind == TokenKind.COLLECTION: self._advance() @@ -213,7 +213,13 @@ def _parse_quantize_clause(self) -> QuantizationConfig: always_ram: bool = False if self._peek().kind == TokenKind.QUANTILE: self._advance() - quantile = float(self._expect(TokenKind.FLOAT).value) + quantile_tok = self._peek() + quantile = float(self._parse_number()) + if not 0.0 <= quantile <= 1.0: + raise QQLSyntaxError( + f"QUANTILE must be between 0 and 1 inclusive, got {quantile}", + quantile_tok.pos, + ) if self._peek().kind == TokenKind.ALWAYS: self._advance() self._expect(TokenKind.RAM) diff --git a/tests/test_parser.py b/tests/test_parser.py index 5eb0673..e0e07d5 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -928,6 +928,16 @@ def test_scalar_with_quantile(self): assert node.quantization.type == QuantizationType.SCALAR assert node.quantization.quantile == pytest.approx(0.99) + def test_scalar_with_quantile_zero(self): + node = parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 0") + assert node.quantization.type == QuantizationType.SCALAR + assert node.quantization.quantile == pytest.approx(0.0) + + def test_scalar_with_quantile_one(self): + node = parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 1") + assert node.quantization.type == QuantizationType.SCALAR + assert node.quantization.quantile == pytest.approx(1.0) + def test_scalar_with_always_ram(self): node = parse("CREATE COLLECTION articles QUANTIZE SCALAR ALWAYS RAM") assert node.quantization.always_ram is True @@ -1013,3 +1023,11 @@ def test_quantize_missing_type_raises(self): def test_quantize_unknown_type_raises(self): with pytest.raises(QQLSyntaxError): parse("CREATE COLLECTION articles QUANTIZE FULL") + + def test_scalar_quantile_above_one_raises(self): + with pytest.raises(QQLSyntaxError): + parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 1.5") + + def test_scalar_quantile_integer_above_one_raises(self): + with pytest.raises(QQLSyntaxError): + parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 2")