From 28dc287b8d0e86540fc48d96efe7122c9a8a2e2d Mon Sep 17 00:00:00 2001
From: Srimon <srimon12mckv@gmail.com>
Date: Wed, 20 May 2026 01:25:26 +0530
Subject: [PATCH] feat: enhance search and recommend functionality with offset,
 score threshold, and cross-collection lookup support

---
 docs/programmatic.md   |   4 +-
 docs/reference.md      |   8 ++-
 docs/search.md         |  22 ++++++-
 src/qql/ast_nodes.py   |   9 ++-
 src/qql/cli.py         |  11 +++-
 src/qql/executor.py    |  30 +++++++++
 src/qql/parser.py      |  37 ++++++++++-
 tests/test_executor.py | 142 +++++++++++++++++++++++++++++++++++++++++
 tests/test_parser.py   |  38 +++++++++++
 9 files changed, 292 insertions(+), 9 deletions(-)
diff --git a/docs/programmatic.md b/docs/programmatic.md
index 6e86021..9771f33 100644
--- a/docs/programmatic.md
+++ b/docs/programmatic.md
@@ -27,7 +27,7 @@ print(result.data)      # {"id": "<uuid>", "collection": "notes"}
 
 # Search
 result = conn.run_query(
-    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023"
+    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 SCORE THRESHOLD 0.8 WHERE year >= 2023"
 )
 for hit in result.data:
     print(hit["score"], hit["payload"])
@@ -124,7 +124,7 @@ with Connection("http://localhost:6333") as conn:
 
     # Recommend similar points
     result = conn.run_query(
-        "RECOMMEND FROM notes POSITIVE IDS (1, 2) NEGATIVE IDS (3) LIMIT 5"
+        "RECOMMEND FROM notes POSITIVE IDS (1, 2) NEGATIVE IDS (3) LIMIT 5 SCORE THRESHOLD 0.6"
     )
     for hit in result.data:
         print(hit["score"], hit["payload"])
diff --git a/docs/reference.md b/docs/reference.md
index f006077..55d61d9 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -33,6 +33,12 @@ Qdrant/bm25
 INSERT INTO docs VALUES {'text': 'hello'} USING MODEL 'BAAI/bge-small-en-v1.5'
 SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5'
 
+-- Pagination and score filtering
+SEARCH docs SIMILAR TO 'hello' LIMIT 5 OFFSET 10 SCORE THRESHOLD 0.8
+
+-- Cross-collection retrieval
+SEARCH docs SIMILAR TO 'hello' LIMIT 5 LOOKUP FROM user_profiles VECTOR 'preferences'
+
 -- Explicit vector names
 INSERT INTO docs VALUES {'text': 'hello'} USING VECTOR 'body'
 SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING VECTOR 'body'
@@ -172,7 +178,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
 pytest tests/ -v
 ```
 
-Expected output: **549 tests passing**.
+Expected output: **604 tests passing**.
 
 ---
 
diff --git a/docs/search.md b/docs/search.md
index 3083409..8daa2d4 100644
--- a/docs/search.md
+++ b/docs/search.md
@@ -10,7 +10,7 @@ An optional `WHERE` clause filters the candidate set **before** similarity ranki
 
 **Syntax:**
 ```
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n>
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [OFFSET <n>] [SCORE THRESHOLD <f>] [LOOKUP FROM <collection> [VECTOR '<name>']]
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING MODEL '<model_name>'
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING VECTOR '<dense_vector_name>'
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING MODEL '<model>'] WHERE <filter>
@@ -29,6 +29,21 @@ Basic search, return top 5 results:
 SEARCH articles SIMILAR TO 'machine learning algorithms' LIMIT 5
 ```
 
+Pagination with OFFSET:
+```sql
+SEARCH articles SIMILAR TO 'machine learning' LIMIT 10 OFFSET 20
+```
+
+Filter low-quality matches with SCORE THRESHOLD:
+```sql
+SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 SCORE THRESHOLD 0.8
+```
+
+Cross-collection vector lookup:
+```sql
+SEARCH articles SIMILAR TO 'deep learning' LIMIT 5 LOOKUP FROM user_profiles VECTOR 'preferences'
+```
+
 Search only papers published after 2020:
 ```sql
 SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020
@@ -71,6 +86,10 @@ Search with native MMR diversification:
 SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 }
 ```
 
+**Clause Order:**
+`SEARCH` requires clauses to appear in this strict order if used:
+`LIMIT` → `OFFSET` → `SCORE THRESHOLD` → `LOOKUP FROM` → `USING ...` → `WHERE` → `RERANK` → `WITH` → `GROUP BY`
+
 **Output:**
 
 Results are displayed as a table with three columns:
@@ -394,6 +413,7 @@ SEARCH <collection> SIMILAR TO '<query>' LIMIT <n> USING HYBRID GROUP BY <field>
 - **`GROUP_SIZE <m>`** — maximum number of points per group (default: **3**).
 - **`GROUP BY <field>`** — the payload field whose values define the groups. **Must be a string (keyword) or number (integer) field** — this is enforced by Qdrant. Dot-notation is supported (e.g. `meta.author`). Array-valued fields are allowed: a point with multiple values for the field can appear in multiple groups. The field should be indexed as `keyword` or `integer` for best performance (see [CREATE INDEX](collections.md)).
 - `WHERE` filters, `USING HYBRID`, and `USING MODEL` are all compatible with GROUP BY.
+- ⚠️ **Incompatibility:** `GROUP BY` is not compatible with `OFFSET` or `RERANK`. Use cursors (not currently supported in QQL) for paginating grouped results in Qdrant.
 - **`GROUP BY` and `RERANK` cannot be combined** in the same statement — this raises a syntax error.
 
 **Examples:**
diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py
index 2e4985f..5f0b2f2 100644
--- a/src/qql/ast_nodes.py
+++ b/src/qql/ast_nodes.py
@@ -295,6 +295,9 @@ class SearchStmt:
     group_size: int = 3                     # max points per group (ignored when group_by is None)
     dense_vector: str | None = None
     sparse_vector: str | None = None
+    offset: int = 0                         # skip first N results
+    score_threshold: float | None = None    # drop results below this score
+    lookup_from: tuple[str, str | None] | None = None # cross-collection retrieval: (collection_name, vector_name)
 
 
 @dataclass(frozen=True)
@@ -305,10 +308,10 @@ class RecommendStmt:
     limit: int = 10
     strategy: str | None = None
     query_filter: FilterExpr | None = None
-    offset: int = 0
-    score_threshold: float | None = None
+    offset: int = 0                         # skip first N results
+    score_threshold: float | None = None    # drop results below this score
     with_clause: SearchWith | None = None
-    lookup_from: tuple[str, str | None] | None = None
+    lookup_from: tuple[str, str | None] | None = None # cross-collection retrieval: (collection_name, vector_name)
     using: str | None = None
 
 
diff --git a/src/qql/cli.py b/src/qql/cli.py
index 2b4ab16..c573f14 100644
--- a/src/qql/cli.py
+++ b/src/qql/cli.py
@@ -88,6 +88,9 @@
 
   [yellow]SEARCH[/yellow] <name> [yellow]SIMILAR TO[/yellow] '<text>' [yellow]LIMIT[/yellow] <n>
       Semantic search by vector similarity.
+      Optional: [yellow]OFFSET[/yellow] <n>
+      Optional: [yellow]SCORE THRESHOLD[/yellow] <float|int>
+      Optional: [yellow]LOOKUP FROM[/yellow] <collection> [[yellow]VECTOR[/yellow] '<vector_name>']
       Optional: [yellow]USING MODEL[/yellow] '<model>'
       Optional: [yellow]USING VECTOR[/yellow] '<dense_vector>'
       Optional: [yellow]USING HYBRID[/yellow] [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [DENSE VECTOR '<name>'] [SPARSE MODEL '<model>'] [SPARSE VECTOR '<name>']
@@ -99,13 +102,19 @@
       Optional: [yellow]GROUP BY[/yellow] <field> [[yellow]GROUP_SIZE[/yellow] <n>]
                   Group results by a payload field value (default GROUP_SIZE: 3).
                   Field must be keyword or integer type. RERANK and GROUP BY cannot be combined.
+                  OFFSET is not supported with GROUP BY.
 
   [yellow]RECOMMEND FROM[/yellow] <name> [yellow]POSITIVE IDS[/yellow] (<id>, ...)
       Find points similar to known examples.
       Optional: [yellow]NEGATIVE IDS[/yellow] (<id>, ...)
       Optional: [yellow]STRATEGY[/yellow] 'average_vector|best_score|sum_scores'
-      Optional: [yellow]WHERE[/yellow] <filter>
+      Optional: [yellow]LOOKUP FROM[/yellow] <collection> [[yellow]VECTOR[/yellow] '<vector_name>']
+      Optional: [yellow]USING[/yellow] '<vector_name>'
       Requires: [yellow]LIMIT[/yellow] <n>
+      Optional: [yellow]OFFSET[/yellow] <n>
+      Optional: [yellow]SCORE THRESHOLD[/yellow] <float|int>
+      Optional: [yellow]WHERE[/yellow] <filter>
+      Optional: [yellow]WITH[/yellow] { hnsw_ef: <int>, exact: <bool>, acorn: <bool>, indexed_only: <bool>, quantization: { ignore: <bool>, rescore: <bool>, oversampling: <n> } }
 
   [yellow]DELETE FROM[/yellow] <name> [yellow]WHERE id =[/yellow] '<id>'
       Delete a point by its ID.
diff --git a/src/qql/executor.py b/src/qql/executor.py
index 46343c3..29c2e63 100644
--- a/src/qql/executor.py
+++ b/src/qql/executor.py
@@ -846,6 +846,13 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
         # enough material to reorder; only `node.limit` results are returned.
         fetch_limit = node.limit * _RERANK_FETCH_MULTIPLIER if node.rerank else node.limit
 
+        lookup_from: LookupLocation | None = None
+        if node.lookup_from is not None:
+            lookup_from = LookupLocation(
+                collection=node.lookup_from[0],
+                vector=node.lookup_from[1],
+            )
+
         # ── GROUP BY SEARCH: delegate to query_points_groups() ─────────────
         if node.group_by is not None:
             return self._execute_search_groups(
@@ -879,7 +886,10 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
                     ],
                     query=FusionQuery(fusion=self._resolve_hybrid_fusion(node.fusion)),
                     limit=fetch_limit,
+                    offset=node.offset or None,
                     query_filter=qdrant_filter,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
             except UnexpectedResponse as e:
                 raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
@@ -919,8 +929,11 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
                     query=sparse_vector,
                     using=topology.sparse_using(node.sparse_vector),
                     limit=fetch_limit,
+                    offset=node.offset or None,
                     query_filter=qdrant_filter,
                     search_params=search_params,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
             except UnexpectedResponse as e:
                 raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
@@ -956,8 +969,11 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
                 query=self._build_dense_query(vector, node.with_clause),
                 using=query_using,
                 limit=fetch_limit,
+                offset=node.offset or None,
                 query_filter=qdrant_filter,
                 search_params=search_params,
+                score_threshold=node.score_threshold,
+                lookup_from=lookup_from,
             )
         except UnexpectedResponse as e:
             raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
@@ -1599,6 +1615,14 @@ def _execute_search_groups(
         topology: CollectionTopology,
     ) -> ExecutionResult:
         """Execute SEARCH ... GROUP BY using query_points_groups()."""
+        
+        lookup_from: LookupLocation | None = None
+        if node.lookup_from is not None:
+            lookup_from = LookupLocation(
+                collection=node.lookup_from[0],
+                vector=node.lookup_from[1],
+            )
+            
         try:
             if node.hybrid:
                 dense_model = node.model or self._config.default_model
@@ -1627,6 +1651,8 @@ def _execute_search_groups(
                     limit=node.limit,
                     group_size=node.group_size,
                     query_filter=qdrant_filter,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
                 label = "hybrid, grouped"
             elif node.sparse_only:
@@ -1645,6 +1671,8 @@ def _execute_search_groups(
                     group_size=node.group_size,
                     query_filter=qdrant_filter,
                     search_params=search_params,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
                 label = "sparse, grouped"
             else:
@@ -1660,6 +1688,8 @@ def _execute_search_groups(
                     group_size=node.group_size,
                     query_filter=qdrant_filter,
                     search_params=search_params,
+                    score_threshold=node.score_threshold,
+                    lookup_from=lookup_from,
                 )
                 label = "grouped"
         except UnexpectedResponse as e:
diff --git a/src/qql/parser.py b/src/qql/parser.py
index 17fdafa..b62b9b0 100644
--- a/src/qql/parser.py
+++ b/src/qql/parser.py
@@ -682,6 +682,31 @@ def _parse_search(self) -> SearchStmt:
         self._expect(TokenKind.LIMIT)
         limit = int(self._expect(TokenKind.INTEGER).value)
 
+        offset: int = 0
+        if self._peek().kind == TokenKind.OFFSET:
+            self._advance()
+            offset_tok = self._peek()
+            offset = int(self._expect(TokenKind.INTEGER).value)
+            if offset < 0:
+                raise QQLSyntaxError("OFFSET must be a non-negative integer", offset_tok.pos)
+
+        score_threshold: float | None = None
+        if self._peek().kind == TokenKind.SCORE:
+            self._advance()
+            self._expect(TokenKind.THRESHOLD)
+            score_threshold = float(self._parse_number())
+
+        lookup_from: tuple[str, str | None] | None = None
+        if self._peek().kind == TokenKind.LOOKUP:
+            self._advance()
+            self._expect(TokenKind.FROM)
+            lookup_collection = self._parse_identifier()
+            lookup_vector: str | None = None
+            if self._peek().kind == TokenKind.VECTOR:
+                self._advance()
+                lookup_vector = self._expect(TokenKind.STRING).value
+            lookup_from = (lookup_collection, lookup_vector)
+
         with_clause: SearchWith | None = None
         if self._peek().kind == TokenKind.EXACT:
             self._advance()
@@ -757,6 +782,7 @@ def _parse_search(self) -> SearchStmt:
             if self._peek().kind == TokenKind.MODEL:
                 self._advance()  # consume MODEL
                 rerank_model = self._expect(TokenKind.STRING).value
+        
         if self._peek().kind == TokenKind.EXACT:
             self._advance()
             if with_clause is None:
@@ -771,6 +797,7 @@ def _parse_search(self) -> SearchStmt:
                     mmr_diversity=with_clause.mmr_diversity,
                     mmr_candidates=with_clause.mmr_candidates,
                 )
+            
         if self._peek().kind == TokenKind.WITH:
             self._advance()  # consume WITH
             parsed_with = self._parse_with_clause()
@@ -793,6 +820,8 @@ def _parse_search(self) -> SearchStmt:
         group_by: str | None = None
         group_size: int = 3
         if self._peek().kind == TokenKind.GROUP:
+            if offset > 0:
+                raise QQLSyntaxError("OFFSET cannot be used with GROUP BY", self._peek().pos)
             self._advance()  # consume GROUP
             self._expect(TokenKind.BY)
             group_by = self._parse_field_path()
@@ -827,6 +856,9 @@ def _parse_search(self) -> SearchStmt:
             group_size=group_size,
             dense_vector=dense_vector,
             sparse_vector=sparse_vector,
+            offset=offset,
+            score_threshold=score_threshold,
+            lookup_from=lookup_from,
         )
 
     def _parse_recommend(self) -> RecommendStmt:
@@ -870,13 +902,16 @@ def _parse_recommend(self) -> RecommendStmt:
         offset: int = 0
         if self._peek().kind == TokenKind.OFFSET:
             self._advance()
+            offset_tok = self._peek()
             offset = int(self._expect(TokenKind.INTEGER).value)
+            if offset < 0:
+                raise QQLSyntaxError("OFFSET must be a non-negative integer", offset_tok.pos)
 
         score_threshold: float | None = None
         if self._peek().kind == TokenKind.SCORE:
             self._advance()
             self._expect(TokenKind.THRESHOLD)
-            score_threshold = float(self._expect(TokenKind.FLOAT).value)
+            score_threshold = float(self._parse_number())
 
         query_filter: FilterExpr | None = None
         if self._peek().kind == TokenKind.WHERE:
diff --git a/tests/test_executor.py b/tests/test_executor.py
index 3f1a366..4ed4864 100644
--- a/tests/test_executor.py
+++ b/tests/test_executor.py
@@ -974,6 +974,136 @@ def test_search_calls_qdrant_query_points(self, executor, mock_client, mocker):
         mock_client.query_points.assert_called_once()
         assert result.success is True
 
+    def test_search_forwards_offset_score_lookup(self, executor, mock_client, mocker):
+        mock_client.collection_exists.return_value = True
+        mock_response = mocker.MagicMock()
+        mock_response.points = []
+        mock_client.query_points.return_value = mock_response
+
+        node = SearchStmt(
+            collection="notes",
+            query_text="hello",
+            limit=5,
+            model=None,
+            offset=10,
+            score_threshold=0.8,
+            lookup_from=("other_coll", "vec_name")
+        )
+        executor.execute(node)
+
+        kwargs = mock_client.query_points.call_args.kwargs
+        assert kwargs["offset"] == 10
+        assert kwargs["score_threshold"] == 0.8
+        assert kwargs["lookup_from"].collection == "other_coll"
+        assert kwargs["lookup_from"].vector == "vec_name"
+
+    def test_search_forwards_offset_score_lookup_hybrid(self, executor, mock_client, mocker):
+        mocker.patch("qql.executor.Embedder", return_value=mocker.MagicMock())
+        mocker.patch("qql.executor.SparseEmbedder", return_value=mocker.MagicMock())
+        mock_client.collection_exists.return_value = True
+        
+        collection_info = mocker.MagicMock()
+        collection_info.config.params.vectors = {"dense": {}}
+        collection_info.config.params.sparse_vectors = {"sparse": {}}
+        mock_client.get_collection.return_value = collection_info
+        
+        mock_response = mocker.MagicMock()
+        mock_response.points = []
+        mock_client.query_points.return_value = mock_response
+
+        node = SearchStmt(
+            collection="notes",
+            query_text="hello",
+            limit=5,
+            model=None,
+            hybrid=True,
+            offset=10,
+            score_threshold=0.8,
+            lookup_from=("other_coll", None)
+        )
+        executor.execute(node)
+
+        kwargs = mock_client.query_points.call_args.kwargs
+        assert kwargs["offset"] == 10
+        assert kwargs["score_threshold"] == 0.8
+        assert kwargs["lookup_from"].collection == "other_coll"
+        assert kwargs["lookup_from"].vector is None
+
+    def test_search_forwards_offset_score_lookup_sparse(self, executor, mock_client, mocker):
+        mocker.patch("qql.executor.SparseEmbedder", return_value=mocker.MagicMock())
+        mock_client.collection_exists.return_value = True
+        
+        collection_info = mocker.MagicMock()
+        collection_info.config.params.vectors = None
+        collection_info.config.params.sparse_vectors = {"sparse": {}}
+        mock_client.get_collection.return_value = collection_info
+        
+        mock_response = mocker.MagicMock()
+        mock_response.points = []
+        mock_client.query_points.return_value = mock_response
+
+        node = SearchStmt(
+            collection="notes",
+            query_text="hello",
+            limit=5,
+            model=None,
+            sparse_only=True,
+            offset=5,
+            score_threshold=0.5,
+            lookup_from=("other", "vec")
+        )
+        executor.execute(node)
+
+        kwargs = mock_client.query_points.call_args.kwargs
+        assert kwargs["offset"] == 5
+        assert kwargs["score_threshold"] == 0.5
+        assert kwargs["lookup_from"].collection == "other"
+        assert kwargs["lookup_from"].vector == "vec"
+
+    def test_search_groups_forwards_score_lookup(self, executor, mock_client, mocker):
+        mocker.patch("qql.executor.Embedder", return_value=mocker.MagicMock())
+        mock_client.collection_exists.return_value = True
+        mock_response = mocker.MagicMock()
+        mock_response.groups = []
+        mock_client.query_points_groups.return_value = mock_response
+
+        node = SearchStmt(
+            collection="notes",
+            query_text="hello",
+            limit=5,
+            model=None,
+            group_by="author",
+            group_size=2,
+            offset=0,
+            score_threshold=0.7,
+            lookup_from=("other_coll", "vec_name")
+        )
+        executor.execute(node)
+
+        kwargs = mock_client.query_points_groups.call_args.kwargs
+        assert "offset" not in kwargs
+        assert kwargs["score_threshold"] == 0.7
+        assert kwargs["lookup_from"].collection == "other_coll"
+        assert kwargs["lookup_from"].vector == "vec_name"
+
+    def test_search_forwards_offset_0_as_none(self, executor, mock_client, mocker):
+        mock_client.collection_exists.return_value = True
+        mock_response = mocker.MagicMock()
+        mock_response.points = []
+        mock_client.query_points.return_value = mock_response
+
+        node = SearchStmt(
+            collection="notes",
+            query_text="hello",
+            limit=5,
+            model=None,
+            offset=0
+        )
+        executor.execute(node)
+
+        kwargs = mock_client.query_points.call_args.kwargs
+        assert kwargs["offset"] is None
+
     def test_search_nonexistent_collection_raises(self, executor, mock_client):
         mock_client.collection_exists.return_value = False
         node = SearchStmt(collection="ghost", query_text="hi", limit=3, model=None)
@@ -1251,6 +1381,18 @@ def test_recommend_forwards_offset(self, executor, mock_client, mocker):
         executor.execute(node)
         assert mock_client.query_points.call_args.kwargs["offset"] == 10
 
+    def test_recommend_forwards_offset_0_as_none(self, executor, mock_client, mocker):
+        mock_client.collection_exists.return_value = True
+        mock_response = mocker.MagicMock()
+        mock_response.points = []
+        mock_client.query_points.return_value = mock_response
+
+        node = RecommendStmt(
+            collection="notes", positive_ids=("a",), limit=5, offset=0
+        )
+        executor.execute(node)
+        assert mock_client.query_points.call_args.kwargs["offset"] is None
+
     def test_recommend_forwards_score_threshold(self, executor, mock_client, mocker):
         mock_client.collection_exists.return_value = True
         mock_response = mocker.MagicMock()
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 3f95f89..9c1bcca 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -265,6 +265,34 @@ def test_search_with_model(self):
         node = parse("SEARCH notes SIMILAR TO 'hi' LIMIT 3 USING MODEL 'my-model'")
         assert node.model == "my-model"
 
+    def test_search_with_offset_score_lookup(self):
+        node = parse(
+            "SEARCH notes SIMILAR TO 'hi' LIMIT 5 OFFSET 10 SCORE THRESHOLD 0.8 LOOKUP FROM other_coll"
+        )
+        assert node.offset == 10
+        assert node.score_threshold == 0.8
+        assert node.lookup_from == ("other_coll", None)
+
+    def test_search_with_integer_score_threshold(self):
+        node = parse(
+            "SEARCH notes SIMILAR TO 'hi' LIMIT 5 SCORE THRESHOLD 1"
+        )
+        assert node.score_threshold == pytest.approx(1.0)
+
+    def test_search_with_negative_offset_raises(self):
+        with pytest.raises(QQLSyntaxError, match="must be a non-negative integer"):
+            parse("SEARCH notes SIMILAR TO 'hi' LIMIT 5 OFFSET -1")
+
+    def test_search_group_by_with_offset_raises(self):
+        with pytest.raises(QQLSyntaxError, match="OFFSET cannot be used with GROUP BY"):
+            parse("SEARCH notes SIMILAR TO 'hi' LIMIT 5 OFFSET 10 GROUP BY author")
+
+    def test_search_with_lookup_vector(self):
+        node = parse(
+            "SEARCH notes SIMILAR TO 'hi' LIMIT 5 LOOKUP FROM other_coll VECTOR 'my_vec'"
+        )
+        assert node.lookup_from == ("other_coll", "my_vec")
+
 
 class TestDelete:
     def test_delete_by_string_id(self):
@@ -323,12 +351,22 @@ def test_recommend_with_offset(self):
         node = parse("RECOMMEND FROM notes POSITIVE IDS ('a') LIMIT 10 OFFSET 5")
         assert node.offset == 5
 
+    def test_recommend_with_negative_offset_raises(self):
+        with pytest.raises(QQLSyntaxError, match="must be a non-negative integer"):
+            parse("RECOMMEND FROM notes POSITIVE IDS ('a') LIMIT 10 OFFSET -1")
+
     def test_recommend_with_score_threshold(self):
         node = parse(
             "RECOMMEND FROM notes POSITIVE IDS ('a') LIMIT 10 SCORE THRESHOLD 0.5"
         )
         assert node.score_threshold == pytest.approx(0.5)
 
+    def test_recommend_with_integer_score_threshold(self):
+        node = parse(
+            "RECOMMEND FROM notes POSITIVE IDS ('a') LIMIT 10 SCORE THRESHOLD 1"
+        )
+        assert node.score_threshold == pytest.approx(1.0)
+
     def test_recommend_with_clause(self):
         node = parse(
             "RECOMMEND FROM notes POSITIVE IDS ('a') LIMIT 10 WITH { exact: true }"