pavanjava · pavanjava · Apr 18, 2026 · Apr 18, 2026
diff --git a/README.md b/README.md
@@ -252,6 +252,7 @@ SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING MODEL '<model
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING MODEL '<model>'] WHERE <filter>
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING SPARSE [MODEL '<sparse_model>']
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> EXACT
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false }
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] RERANK [MODEL '<reranker_model>']
@@ -284,6 +285,16 @@ Hybrid search with a WHERE filter:
 SEARCH articles SIMILAR TO 'transformers' LIMIT 10 USING HYBRID WHERE year >= 2020
 ```
 
+Sparse-only search (queries only the `sparse` named vector — useful for pure keyword retrieval):
+```sql
+SEARCH medical_knowledge SIMILAR TO 'beta blocker contraindications' LIMIT 5 USING SPARSE
+```
+
+Sparse-only with a custom SPLADE model:
+```sql
+SEARCH medical_knowledge SIMILAR TO 'beta blocker contraindications' LIMIT 5 USING SPARSE MODEL 'prithivida/Splade_PP_en_v1'
+```
+
 Exact search for recall debugging:
 ```sql
 SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 EXACT
@@ -498,7 +509,12 @@ Hybrid search combines **dense semantic vectors** and **sparse BM25 keyword vect
 A hybrid collection stores both a named dense vector (`"dense"`) and a named sparse vector (`"sparse"`):
 
 ```sql
+-- Shorthand (backward compatible)
 CREATE COLLECTION articles HYBRID
+
+-- USING form — allows specifying a dense model
+CREATE COLLECTION articles USING HYBRID
+CREATE COLLECTION articles USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
 ```
 
 This is equivalent to calling Qdrant with:
@@ -695,21 +711,34 @@ Explicitly creates a new empty collection. Collections are also created automati
 ```
 CREATE COLLECTION <collection_name>
 CREATE COLLECTION <collection_name> HYBRID
+CREATE COLLECTION <collection_name> USING MODEL '<model_name>'
+CREATE COLLECTION <collection_name> USING HYBRID
+CREATE COLLECTION <collection_name> USING HYBRID DENSE MODEL '<model>'
 ```
 
 **Examples:**
 
-Dense-only collection (standard):
+Dense-only collection (standard, uses default model dimensions):
 ```sql
 CREATE COLLECTION research_papers
 ```
 
-Hybrid collection (dense + sparse BM25):
+Dense-only collection pinned to a specific model (768-dimensional):
+```sql
+CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5'
+```
+
+Hybrid collection (dense + sparse BM25, default models):
 ```sql
 CREATE COLLECTION research_papers HYBRID
 ```
 
-The collection is created using the **default embedding model's dimensions** (384 for `all-MiniLM-L6-v2`) with **cosine distance**.
+Hybrid collection with a custom dense model:
+```sql
+CREATE COLLECTION research_papers USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
+```
+
+When `USING MODEL` is omitted, the collection uses the **default embedding model's dimensions** (384 for `all-MiniLM-L6-v2`). Specify `USING MODEL` to pin the collection to a specific model's output size — this must match the model you use in INSERT and SEARCH.
 
 If the collection already exists, the command succeeds with a message and does nothing.
 

diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py
@@ -133,6 +133,7 @@ class InsertStmt:
 class CreateCollectionStmt:
     collection: str
     hybrid: bool = False    # if True, create with dense + sparse named vectors
+    model: str | None = None  # dense model; None → use config default
 
 
 @dataclass(frozen=True)
@@ -152,7 +153,8 @@ class SearchStmt:
     limit: int
     model: str | None               # dense model; None → use config default
     hybrid: bool = False            # if True, use prefetch+RRF hybrid search
-    sparse_model: str | None = None # sparse model for hybrid; None → SparseEmbedder.DEFAULT_MODEL
+    sparse_only: bool = False       # if True, query only the sparse vector (no dense)
+    sparse_model: str | None = None # sparse model for hybrid/sparse-only; None → SparseEmbedder.DEFAULT_MODEL
     query_filter: FilterExpr | None = None  # optional WHERE clause; default keeps existing tests valid
     rerank: bool = False                    # if True, apply cross-encoder reranking post-Qdrant
     rerank_model: str | None = None         # cross-encoder model; None → CrossEncoderEmbedder.DEFAULT_MODEL

diff --git a/src/qql/cli.py b/src/qql/cli.py
@@ -30,6 +30,8 @@
 
   [yellow]CREATE COLLECTION[/yellow] <name> [[yellow]HYBRID[/yellow]]
       Create a new collection. Add HYBRID for dense+sparse BM25 vectors.
+      Optional: [yellow]USING MODEL[/yellow] '<model>'
+      Optional: [yellow]USING HYBRID[/yellow] [DENSE MODEL '<model>']
 
   [yellow]DROP COLLECTION[/yellow] <name>
       Delete a collection and all its points.
@@ -41,6 +43,7 @@
       Semantic search by vector similarity.
       Optional: [yellow]USING MODEL[/yellow] '<model>'
       Optional: [yellow]USING HYBRID[/yellow] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>']
+      Optional: [yellow]USING SPARSE[/yellow] [MODEL '<model>']   sparse-vector-only search
       Optional: [yellow]WHERE[/yellow] <filter>   (e.g. WHERE year > 2020 AND status = 'ok')
       Optional: [yellow]RERANK[/yellow] [MODEL '<model>']   rerank results with a cross-encoder
       Optional: [yellow]EXACT[/yellow]   bypass HNSW and perform exact search

diff --git a/src/qql/executor.py b/src/qql/executor.py
@@ -177,9 +177,11 @@ def _execute_create(self, node: CreateCollectionStmt) -> ExecutionResult:
                 message=f"Collection '{node.collection}' already exists",
             )
 
+        dense_model_name = node.model or self._config.default_model
+
         # ── Hybrid collection: named dense + sparse vectors ────────────────
         if node.hybrid:
-            embedder = Embedder(self._config.default_model)
+            embedder = Embedder(dense_model_name)
             dims = embedder.dimensions
             self._client.create_collection(
                 collection_name=node.collection,
@@ -199,7 +201,7 @@ def _execute_create(self, node: CreateCollectionStmt) -> ExecutionResult:
             )
 
         # ── Standard dense-only collection ─────────────────────────────────
-        embedder = Embedder(self._config.default_model)
+        embedder = Embedder(dense_model_name)
         dims = embedder.dimensions
         self._client.create_collection(
             collection_name=node.collection,
@@ -302,6 +304,46 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
                 data=results,
             )
 
+        # ── Sparse-only SEARCH: query the "sparse" named vector directly ─────
+        if node.sparse_only:
+            sparse_model_name = node.sparse_model or SparseEmbedder.DEFAULT_MODEL
+            sparse_embedder = SparseEmbedder(sparse_model_name)
+            sparse_obj = sparse_embedder.query_embed(node.query_text)
+            sparse_vector = SparseVector(
+                indices=sparse_obj["indices"],
+                values=sparse_obj["values"],
+            )
+
+            try:
+                response = self._client.query_points(
+                    collection_name=node.collection,
+                    query=sparse_vector,
+                    using="sparse",
+                    limit=fetch_limit,
+                    query_filter=qdrant_filter,
+                )
+            except UnexpectedResponse as e:
+                raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
+
+            results = [
+                {"id": str(h.id), "score": round(h.score, 4), "payload": h.payload}
+                for h in response.points
+            ]
+
+            if node.rerank:
+                results = self._apply_reranking(node.query_text, results, node.limit, node.rerank_model)
+                return ExecutionResult(
+                    success=True,
+                    message=f"Found {len(results)} result(s) (sparse, reranked)",
+                    data=results,
+                )
+
+            return ExecutionResult(
+                success=True,
+                message=f"Found {len(results)} result(s) (sparse)",
+                data=results,
+            )
+
         # ── Standard dense-only SEARCH ─────────────────────────────────────
         model_name = node.model or self._config.default_model
         embedder = Embedder(model_name)

diff --git a/src/qql/parser.py b/src/qql/parser.py
@@ -107,10 +107,31 @@ def _parse_create(self) -> CreateCollectionStmt:
         self._expect(TokenKind.COLLECTION)
         collection = self._parse_identifier()
         hybrid: bool = False
+        model: str | None = None
+
         if self._peek().kind == TokenKind.HYBRID:
+            # Bare HYBRID shorthand — backward compat
             self._advance()
             hybrid = True
-        return CreateCollectionStmt(collection=collection, hybrid=hybrid)
+        elif self._peek().kind == TokenKind.USING:
+            self._advance()  # consume USING
+            if self._peek().kind == TokenKind.HYBRID:
+                self._advance()  # consume HYBRID
+                hybrid = True
+                # Optional DENSE MODEL sub-clause
+                if self._peek().kind == TokenKind.DENSE:
+                    self._advance()  # consume DENSE
+                    self._expect(TokenKind.MODEL)
+                    model = self._expect(TokenKind.STRING).value
+            else:
+                self._expect(TokenKind.MODEL)
+                model = self._expect(TokenKind.STRING).value
+
+        return CreateCollectionStmt(
+            collection=collection,
+            hybrid=hybrid,
+            model=model,
+        )
 
     def _parse_drop(self) -> DropCollectionStmt:
         self._expect(TokenKind.DROP)
@@ -139,6 +160,7 @@ def _parse_search(self) -> SearchStmt:
 
         model: str | None = None
         hybrid: bool = False
+        sparse_only: bool = False
         sparse_model: str | None = None
         if self._peek().kind == TokenKind.USING:
             self._advance()  # consume USING
@@ -154,6 +176,12 @@ def _parse_search(self) -> SearchStmt:
                         model = m
                     else:
                         sparse_model = m
+            elif self._peek().kind == TokenKind.SPARSE:
+                self._advance()  # consume SPARSE
+                sparse_only = True
+                if self._peek().kind == TokenKind.MODEL:
+                    self._advance()  # consume MODEL
+                    sparse_model = self._expect(TokenKind.STRING).value
             else:
                 self._expect(TokenKind.MODEL)
                 model = self._expect(TokenKind.STRING).value
@@ -196,6 +224,7 @@ def _parse_search(self) -> SearchStmt:
             limit=limit,
             model=model,
             hybrid=hybrid,
+            sparse_only=sparse_only,
             sparse_model=sparse_model,
             query_filter=query_filter,
             rerank=rerank,