pavanjava · pavanjava · May 20, 2026 · May 19, 2026 · coderabbitai · May 19, 2026
diff --git a/docs/programmatic.md b/docs/programmatic.md
@@ -27,7 +27,7 @@ print(result.data)      # {"id": "<uuid>", "collection": "notes"}
 
 # Search
 result = conn.run_query(
-    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023"
+    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 SCORE THRESHOLD 0.8 WHERE year >= 2023"
 )
 for hit in result.data:
     print(hit["score"], hit["payload"])
@@ -124,7 +124,7 @@ with Connection("http://localhost:6333") as conn:
 
     # Recommend similar points
     result = conn.run_query(
-        "RECOMMEND FROM notes POSITIVE IDS (1, 2) NEGATIVE IDS (3) LIMIT 5"
+        "RECOMMEND FROM notes POSITIVE IDS (1, 2) NEGATIVE IDS (3) LIMIT 5 SCORE THRESHOLD 0.6"
     )
     for hit in result.data:
         print(hit["score"], hit["payload"])

diff --git a/docs/reference.md b/docs/reference.md
@@ -33,6 +33,12 @@ Qdrant/bm25
 INSERT INTO docs VALUES {'text': 'hello'} USING MODEL 'BAAI/bge-small-en-v1.5'
 SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5'
 
+-- Pagination and score filtering
+SEARCH docs SIMILAR TO 'hello' LIMIT 5 OFFSET 10 SCORE THRESHOLD 0.8
+
+-- Cross-collection retrieval
+SEARCH docs SIMILAR TO 'hello' LIMIT 5 LOOKUP FROM user_profiles VECTOR 'preferences'
+
 -- Explicit vector names
 INSERT INTO docs VALUES {'text': 'hello'} USING VECTOR 'body'
 SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING VECTOR 'body'
@@ -172,7 +178,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
 pytest tests/ -v
 ```
 
-Expected output: **549 tests passing**.
+Expected output: **604 tests passing**.
 
 ---
 

diff --git a/docs/search.md b/docs/search.md
@@ -10,7 +10,7 @@ An optional `WHERE` clause filters the candidate set **before** similarity ranki
 
 **Syntax:**
 ```
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n>
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [OFFSET <n>] [SCORE THRESHOLD <f>] [LOOKUP FROM <collection> [VECTOR '<name>']]
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING MODEL '<model_name>'
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING VECTOR '<dense_vector_name>'
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING MODEL '<model>'] WHERE <filter>
@@ -29,6 +29,21 @@ Basic search, return top 5 results:
 SEARCH articles SIMILAR TO 'machine learning algorithms' LIMIT 5
 ```
 
+Pagination with OFFSET:
+```sql
+SEARCH articles SIMILAR TO 'machine learning' LIMIT 10 OFFSET 20
+```
+
+Filter low-quality matches with SCORE THRESHOLD:
+```sql
+SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 SCORE THRESHOLD 0.8
+```
+
+Cross-collection vector lookup:
+```sql
+SEARCH articles SIMILAR TO 'deep learning' LIMIT 5 LOOKUP FROM user_profiles VECTOR 'preferences'
+```
+
 Search only papers published after 2020:
 ```sql
 SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020
@@ -71,6 +86,10 @@ Search with native MMR diversification:
 SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 }
 ```
 
+**Clause Order:**
+`SEARCH` requires clauses to appear in this strict order if used:
+`LIMIT` → `OFFSET` → `SCORE THRESHOLD` → `LOOKUP FROM` → `USING ...` → `WHERE` → `RERANK` → `WITH` → `GROUP BY`
+
 **Output:**
 
 Results are displayed as a table with three columns:
@@ -394,6 +413,7 @@ SEARCH <collection> SIMILAR TO '<query>' LIMIT <n> USING HYBRID GROUP BY <field>
 - **`GROUP_SIZE <m>`** — maximum number of points per group (default: **3**).
 - **`GROUP BY <field>`** — the payload field whose values define the groups. **Must be a string (keyword) or number (integer) field** — this is enforced by Qdrant. Dot-notation is supported (e.g. `meta.author`). Array-valued fields are allowed: a point with multiple values for the field can appear in multiple groups. The field should be indexed as `keyword` or `integer` for best performance (see [CREATE INDEX](collections.md)).
 - `WHERE` filters, `USING HYBRID`, and `USING MODEL` are all compatible with GROUP BY.
+- ⚠️ **Incompatibility:** `GROUP BY` is not compatible with `OFFSET` or `RERANK`. Use cursors (not currently supported in QQL) for paginating grouped results in Qdrant.
 - **`GROUP BY` and `RERANK` cannot be combined** in the same statement — this raises a syntax error.
 
 **Examples:**

diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py
@@ -295,6 +295,9 @@ class SearchStmt:
     group_size: int = 3                     # max points per group (ignored when group_by is None)
     dense_vector: str | None = None
     sparse_vector: str | None = None
+    offset: int = 0                         # skip first N results
+    score_threshold: float | None = None    # drop results below this score
+    lookup_from: tuple[str, str | None] | None = None # cross-collection retrieval: (collection_name, vector_name)
 
 
 @dataclass(frozen=True)
@@ -305,10 +308,10 @@ class RecommendStmt:
     limit: int = 10
     strategy: str | None = None
     query_filter: FilterExpr | None = None
-    offset: int = 0
-    score_threshold: float | None = None
+    offset: int = 0                         # skip first N results
+    score_threshold: float | None = None    # drop results below this score
     with_clause: SearchWith | None = None
-    lookup_from: tuple[str, str | None] | None = None
+    lookup_from: tuple[str, str | None] | None = None # cross-collection retrieval: (collection_name, vector_name)
     using: str | None = None
 
 

diff --git a/src/qql/cli.py b/src/qql/cli.py
@@ -88,6 +88,9 @@
 
   [yellow]SEARCH[/yellow] <name> [yellow]SIMILAR TO[/yellow] '<text>' [yellow]LIMIT[/yellow] <n>
       Semantic search by vector similarity.
+      Optional: [yellow]OFFSET[/yellow] <n>
+      Optional: [yellow]SCORE THRESHOLD[/yellow] <float|int>
+      Optional: [yellow]LOOKUP FROM[/yellow] <collection> [[yellow]VECTOR[/yellow] '<vector_name>']
       Optional: [yellow]USING MODEL[/yellow] '<model>'
       Optional: [yellow]USING VECTOR[/yellow] '<dense_vector>'
       Optional: [yellow]USING HYBRID[/yellow] [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [DENSE VECTOR '<name>'] [SPARSE MODEL '<model>'] [SPARSE VECTOR '<name>']
@@ -99,13 +102,19 @@
       Optional: [yellow]GROUP BY[/yellow] <field> [[yellow]GROUP_SIZE[/yellow] <n>]
                   Group results by a payload field value (default GROUP_SIZE: 3).
                   Field must be keyword or integer type. RERANK and GROUP BY cannot be combined.
+                  OFFSET is not supported with GROUP BY.
-                  OFFSET is not supported with GROUP BY.
+                  OFFSET > 0 is not supported with GROUP BY (OFFSET 0 is treated as no offset).
-                  OFFSET is not supported with GROUP BY.
+                  OFFSET > 0 is not supported with GROUP BY (OFFSET 0 is treated as no offset).
 
   [yellow]RECOMMEND FROM[/yellow] <name> [yellow]POSITIVE IDS[/yellow] (<id>, ...)
       Find points similar to known examples.
       Optional: [yellow]NEGATIVE IDS[/yellow] (<id>, ...)
       Optional: [yellow]STRATEGY[/yellow] 'average_vector|best_score|sum_scores'
-      Optional: [yellow]WHERE[/yellow] <filter>
+      Optional: [yellow]LOOKUP FROM[/yellow] <collection> [[yellow]VECTOR[/yellow] '<vector_name>']
+      Optional: [yellow]USING[/yellow] '<vector_name>'
       Requires: [yellow]LIMIT[/yellow] <n>
+      Optional: [yellow]OFFSET[/yellow] <n>
+      Optional: [yellow]SCORE THRESHOLD[/yellow] <float|int>
+      Optional: [yellow]WHERE[/yellow] <filter>
+      Optional: [yellow]WITH[/yellow] { hnsw_ef: <int>, exact: <bool>, acorn: <bool>, indexed_only: <bool>, quantization: { ignore: <bool>, rescore: <bool>, oversampling: <n> } }
 
   [yellow]DELETE FROM[/yellow] <name> [yellow]WHERE id =[/yellow] '<id>'
       Delete a point by its ID.

diff --git a/src/qql/executor.py b/src/qql/executor.py
@@ -846,6 +846,13 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
         # enough material to reorder; only `node.limit` results are returned.
         fetch_limit = node.limit * _RERANK_FETCH_MULTIPLIER if node.rerank else node.limit
 
+        lookup_from: LookupLocation | None = None
+        if node.lookup_from is not None:
+            lookup_from = LookupLocation(
+                collection=node.lookup_from[0],
+                vector=node.lookup_from[1],
+            )
+
         # ── GROUP BY SEARCH: delegate to query_points_groups() ─────────────
         if node.group_by is not None:
             return self._execute_search_groups(
@@ -879,7 +886,10 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
                     ],
                     query=FusionQuery(fusion=self._resolve_hybrid_fusion(node.fusion)),
                     limit=fetch_limit,
+                    offset=node.offset or None,
                     query_filter=qdrant_filter,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
             except UnexpectedResponse as e:
                 raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
@@ -919,8 +929,11 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
                     query=sparse_vector,
                     using=topology.sparse_using(node.sparse_vector),
                     limit=fetch_limit,
+                    offset=node.offset or None,
                     query_filter=qdrant_filter,
                     search_params=search_params,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
             except UnexpectedResponse as e:
                 raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
@@ -956,8 +969,11 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
                 query=self._build_dense_query(vector, node.with_clause),
                 using=query_using,
                 limit=fetch_limit,
+                offset=node.offset or None,
                 query_filter=qdrant_filter,
                 search_params=search_params,
+                score_threshold=node.score_threshold,
+                lookup_from=lookup_from,
             )
         except UnexpectedResponse as e:
             raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
@@ -1599,6 +1615,14 @@ def _execute_search_groups(
         topology: CollectionTopology,
     ) -> ExecutionResult:
         """Execute SEARCH ... GROUP BY using query_points_groups()."""
+
+        lookup_from: LookupLocation | None = None
+        if node.lookup_from is not None:
+            lookup_from = LookupLocation(
+                collection=node.lookup_from[0],
+                vector=node.lookup_from[1],
+            )
+
         try:
             if node.hybrid:
                 dense_model = node.model or self._config.default_model
@@ -1627,6 +1651,8 @@ def _execute_search_groups(
                     limit=node.limit,
                     group_size=node.group_size,
                     query_filter=qdrant_filter,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
                 label = "hybrid, grouped"
             elif node.sparse_only:
@@ -1645,6 +1671,8 @@ def _execute_search_groups(
                     group_size=node.group_size,
                     query_filter=qdrant_filter,
                     search_params=search_params,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
                 label = "sparse, grouped"
             else:
@@ -1660,6 +1688,8 @@ def _execute_search_groups(
                     group_size=node.group_size,
                     query_filter=qdrant_filter,
                     search_params=search_params,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
                 label = "grouped"
         except UnexpectedResponse as e:

diff --git a/src/qql/parser.py b/src/qql/parser.py
@@ -682,6 +682,31 @@ def _parse_search(self) -> SearchStmt:
         self._expect(TokenKind.LIMIT)
         limit = int(self._expect(TokenKind.INTEGER).value)
 
+        offset: int = 0
+        if self._peek().kind == TokenKind.OFFSET:
+            self._advance()
+            offset_tok = self._peek()
+            offset = int(self._expect(TokenKind.INTEGER).value)
+            if offset < 0:
+                raise QQLSyntaxError("OFFSET must be a non-negative integer", offset_tok.pos)
+
+        score_threshold: float | None = None
+        if self._peek().kind == TokenKind.SCORE:
+            self._advance()
+            self._expect(TokenKind.THRESHOLD)
+            score_threshold = float(self._parse_number())
+
+        lookup_from: tuple[str, str | None] | None = None
+        if self._peek().kind == TokenKind.LOOKUP:
+            self._advance()
+            self._expect(TokenKind.FROM)
+            lookup_collection = self._parse_identifier()
+            lookup_vector: str | None = None
+            if self._peek().kind == TokenKind.VECTOR:
+                self._advance()
+                lookup_vector = self._expect(TokenKind.STRING).value
+            lookup_from = (lookup_collection, lookup_vector)
+
         with_clause: SearchWith | None = None
         if self._peek().kind == TokenKind.EXACT:
             self._advance()
@@ -757,6 +782,7 @@ def _parse_search(self) -> SearchStmt:
             if self._peek().kind == TokenKind.MODEL:
                 self._advance()  # consume MODEL
                 rerank_model = self._expect(TokenKind.STRING).value
+
         if self._peek().kind == TokenKind.EXACT:
             self._advance()
             if with_clause is None:
@@ -771,6 +797,7 @@ def _parse_search(self) -> SearchStmt:
                     mmr_diversity=with_clause.mmr_diversity,
                     mmr_candidates=with_clause.mmr_candidates,
                 )
+
         if self._peek().kind == TokenKind.WITH:
             self._advance()  # consume WITH
             parsed_with = self._parse_with_clause()
@@ -793,6 +820,8 @@ def _parse_search(self) -> SearchStmt:
         group_by: str | None = None
         group_size: int = 3
         if self._peek().kind == TokenKind.GROUP:
+            if offset > 0:
+                raise QQLSyntaxError("OFFSET cannot be used with GROUP BY", self._peek().pos)
             self._advance()  # consume GROUP
             self._expect(TokenKind.BY)
             group_by = self._parse_field_path()
@@ -827,6 +856,9 @@ def _parse_search(self) -> SearchStmt:
             group_size=group_size,
             dense_vector=dense_vector,
             sparse_vector=sparse_vector,
+            offset=offset,
+            score_threshold=score_threshold,
+            lookup_from=lookup_from,
         )
 
     def _parse_recommend(self) -> RecommendStmt:
@@ -870,13 +902,16 @@ def _parse_recommend(self) -> RecommendStmt:
         offset: int = 0
         if self._peek().kind == TokenKind.OFFSET:
             self._advance()
+            offset_tok = self._peek()
             offset = int(self._expect(TokenKind.INTEGER).value)
+            if offset < 0:
+                raise QQLSyntaxError("OFFSET must be a non-negative integer", offset_tok.pos)
 
         score_threshold: float | None = None
         if self._peek().kind == TokenKind.SCORE:
             self._advance()
             self._expect(TokenKind.THRESHOLD)
-            score_threshold = float(self._expect(TokenKind.FLOAT).value)
+            score_threshold = float(self._parse_number())
 
         query_filter: FilterExpr | None = None
         if self._peek().kind == TokenKind.WHERE: