Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/programmatic.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ print(result.data) # {"id": "<uuid>", "collection": "notes"}

# Search
result = conn.run_query(
"SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023"
"SEARCH notes SIMILAR TO 'hello' LIMIT 5 SCORE THRESHOLD 0.8 WHERE year >= 2023"
)
for hit in result.data:
print(hit["score"], hit["payload"])
Expand Down Expand Up @@ -124,7 +124,7 @@ with Connection("http://localhost:6333") as conn:

# Recommend similar points
result = conn.run_query(
"RECOMMEND FROM notes POSITIVE IDS (1, 2) NEGATIVE IDS (3) LIMIT 5"
"RECOMMEND FROM notes POSITIVE IDS (1, 2) NEGATIVE IDS (3) LIMIT 5 SCORE THRESHOLD 0.6"
)
for hit in result.data:
print(hit["score"], hit["payload"])
Expand Down
8 changes: 7 additions & 1 deletion docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ Qdrant/bm25
INSERT INTO docs VALUES {'text': 'hello'} USING MODEL 'BAAI/bge-small-en-v1.5'
SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5'

-- Pagination and score filtering
SEARCH docs SIMILAR TO 'hello' LIMIT 5 OFFSET 10 SCORE THRESHOLD 0.8

-- Cross-collection retrieval
SEARCH docs SIMILAR TO 'hello' LIMIT 5 LOOKUP FROM user_profiles VECTOR 'preferences'

-- Explicit vector names
INSERT INTO docs VALUES {'text': 'hello'} USING VECTOR 'body'
SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING VECTOR 'body'
Expand Down Expand Up @@ -172,7 +178,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
pytest tests/ -v
```

Expected output: **549 tests passing**.
Expected output: **604 tests passing**.

---

Expand Down
22 changes: 21 additions & 1 deletion docs/search.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ An optional `WHERE` clause filters the candidate set **before** similarity ranki

**Syntax:**
```
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n>
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [OFFSET <n>] [SCORE THRESHOLD <f>] [LOOKUP FROM <collection> [VECTOR '<name>']]
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING MODEL '<model_name>'
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING VECTOR '<dense_vector_name>'
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING MODEL '<model>'] WHERE <filter>
Expand All @@ -29,6 +29,21 @@ Basic search, return top 5 results:
SEARCH articles SIMILAR TO 'machine learning algorithms' LIMIT 5
```

Pagination with OFFSET:
```sql
SEARCH articles SIMILAR TO 'machine learning' LIMIT 10 OFFSET 20
```

Filter low-quality matches with SCORE THRESHOLD:
```sql
SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 SCORE THRESHOLD 0.8
```

Cross-collection vector lookup:
```sql
SEARCH articles SIMILAR TO 'deep learning' LIMIT 5 LOOKUP FROM user_profiles VECTOR 'preferences'
```

Search only papers published after 2020:
```sql
SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020
Expand Down Expand Up @@ -71,6 +86,10 @@ Search with native MMR diversification:
SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 }
```

**Clause Order:**
`SEARCH` requires clauses to appear in this strict order if used:
`LIMIT` → `OFFSET` → `SCORE THRESHOLD` → `LOOKUP FROM` → `USING ...` → `WHERE` → `RERANK` → `WITH` → `GROUP BY`

**Output:**

Results are displayed as a table with three columns:
Expand Down Expand Up @@ -394,6 +413,7 @@ SEARCH <collection> SIMILAR TO '<query>' LIMIT <n> USING HYBRID GROUP BY <field>
- **`GROUP_SIZE <m>`** — maximum number of points per group (default: **3**).
- **`GROUP BY <field>`** — the payload field whose values define the groups. **Must be a string (keyword) or number (integer) field** — this is enforced by Qdrant. Dot-notation is supported (e.g. `meta.author`). Array-valued fields are allowed: a point with multiple values for the field can appear in multiple groups. The field should be indexed as `keyword` or `integer` for best performance (see [CREATE INDEX](collections.md)).
- `WHERE` filters, `USING HYBRID`, and `USING MODEL` are all compatible with GROUP BY.
- ⚠️ **Incompatibility:** `GROUP BY` is not compatible with `OFFSET` or `RERANK`. Use cursors (not currently supported in QQL) for paginating grouped results in Qdrant.
- **`GROUP BY` and `RERANK` cannot be combined** in the same statement — this raises a syntax error.

**Examples:**
Expand Down
9 changes: 6 additions & 3 deletions src/qql/ast_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,9 @@ class SearchStmt:
group_size: int = 3 # max points per group (ignored when group_by is None)
dense_vector: str | None = None
sparse_vector: str | None = None
offset: int = 0 # skip first N results
score_threshold: float | None = None # drop results below this score
lookup_from: tuple[str, str | None] | None = None # cross-collection retrieval: (collection_name, vector_name)


@dataclass(frozen=True)
Expand All @@ -305,10 +308,10 @@ class RecommendStmt:
limit: int = 10
strategy: str | None = None
query_filter: FilterExpr | None = None
offset: int = 0
score_threshold: float | None = None
offset: int = 0 # skip first N results
score_threshold: float | None = None # drop results below this score
with_clause: SearchWith | None = None
lookup_from: tuple[str, str | None] | None = None
lookup_from: tuple[str, str | None] | None = None # cross-collection retrieval: (collection_name, vector_name)
using: str | None = None


Expand Down
11 changes: 10 additions & 1 deletion src/qql/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@

[yellow]SEARCH[/yellow] <name> [yellow]SIMILAR TO[/yellow] '<text>' [yellow]LIMIT[/yellow] <n>
Semantic search by vector similarity.
Optional: [yellow]OFFSET[/yellow] <n>
Optional: [yellow]SCORE THRESHOLD[/yellow] <float|int>
Optional: [yellow]LOOKUP FROM[/yellow] <collection> [[yellow]VECTOR[/yellow] '<vector_name>']
Optional: [yellow]USING MODEL[/yellow] '<model>'
Optional: [yellow]USING VECTOR[/yellow] '<dense_vector>'
Optional: [yellow]USING HYBRID[/yellow] [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [DENSE VECTOR '<name>'] [SPARSE MODEL '<model>'] [SPARSE VECTOR '<name>']
Expand All @@ -99,13 +102,19 @@
Optional: [yellow]GROUP BY[/yellow] <field> [[yellow]GROUP_SIZE[/yellow] <n>]
Group results by a payload field value (default GROUP_SIZE: 3).
Field must be keyword or integer type. RERANK and GROUP BY cannot be combined.
OFFSET is not supported with GROUP BY.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Align GROUP BY/OFFSET help text with actual parser behavior.

Line 105 currently states OFFSET is unsupported with GROUP BY, but parser only rejects OFFSET > 0 (so OFFSET 0 is accepted). Please make docs and behavior consistent.

Proposed doc fix
-                  OFFSET is not supported with GROUP BY.
+                  OFFSET > 0 is not supported with GROUP BY (OFFSET 0 is treated as no offset).
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
OFFSET is not supported with GROUP BY.
OFFSET > 0 is not supported with GROUP BY (OFFSET 0 is treated as no offset).
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/qql/cli.py` at line 105, Update the CLI help text so it matches parser
behavior: replace the current message "OFFSET is not supported with GROUP BY."
with a precise statement that only OFFSET > 0 is disallowed with GROUP BY (e.g.,
"OFFSET > 0 is not supported with GROUP BY; OFFSET 0 is allowed"). Locate and
edit the help string used when constructing the OFFSET/limit argument in
src/qql/cli.py (the parser.add_argument help text for the OFFSET option) and run
the CLI tests or smoke commands to confirm the wording now aligns with the
parser behavior.


[yellow]RECOMMEND FROM[/yellow] <name> [yellow]POSITIVE IDS[/yellow] (<id>, ...)
Find points similar to known examples.
Optional: [yellow]NEGATIVE IDS[/yellow] (<id>, ...)
Optional: [yellow]STRATEGY[/yellow] 'average_vector|best_score|sum_scores'
Optional: [yellow]WHERE[/yellow] <filter>
Optional: [yellow]LOOKUP FROM[/yellow] <collection> [[yellow]VECTOR[/yellow] '<vector_name>']
Optional: [yellow]USING[/yellow] '<vector_name>'
Requires: [yellow]LIMIT[/yellow] <n>
Optional: [yellow]OFFSET[/yellow] <n>
Optional: [yellow]SCORE THRESHOLD[/yellow] <float|int>
Optional: [yellow]WHERE[/yellow] <filter>
Optional: [yellow]WITH[/yellow] { hnsw_ef: <int>, exact: <bool>, acorn: <bool>, indexed_only: <bool>, quantization: { ignore: <bool>, rescore: <bool>, oversampling: <n> } }

[yellow]DELETE FROM[/yellow] <name> [yellow]WHERE id =[/yellow] '<id>'
Delete a point by its ID.
Expand Down
30 changes: 30 additions & 0 deletions src/qql/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,13 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
# enough material to reorder; only `node.limit` results are returned.
fetch_limit = node.limit * _RERANK_FETCH_MULTIPLIER if node.rerank else node.limit

lookup_from: LookupLocation | None = None
if node.lookup_from is not None:
lookup_from = LookupLocation(
collection=node.lookup_from[0],
vector=node.lookup_from[1],
)

# ── GROUP BY SEARCH: delegate to query_points_groups() ─────────────
if node.group_by is not None:
return self._execute_search_groups(
Expand Down Expand Up @@ -879,7 +886,10 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
],
query=FusionQuery(fusion=self._resolve_hybrid_fusion(node.fusion)),
limit=fetch_limit,
offset=node.offset or None,
query_filter=qdrant_filter,
score_threshold=node.score_threshold,
lookup_from=lookup_from,
)
except UnexpectedResponse as e:
raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
Expand Down Expand Up @@ -919,8 +929,11 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
query=sparse_vector,
using=topology.sparse_using(node.sparse_vector),
limit=fetch_limit,
offset=node.offset or None,
query_filter=qdrant_filter,
search_params=search_params,
score_threshold=node.score_threshold,
lookup_from=lookup_from,
)
except UnexpectedResponse as e:
raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
Expand Down Expand Up @@ -956,8 +969,11 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
query=self._build_dense_query(vector, node.with_clause),
using=query_using,
limit=fetch_limit,
offset=node.offset or None,
query_filter=qdrant_filter,
search_params=search_params,
score_threshold=node.score_threshold,
lookup_from=lookup_from,
)
except UnexpectedResponse as e:
raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
Expand Down Expand Up @@ -1599,6 +1615,14 @@ def _execute_search_groups(
topology: CollectionTopology,
) -> ExecutionResult:
"""Execute SEARCH ... GROUP BY using query_points_groups()."""

lookup_from: LookupLocation | None = None
if node.lookup_from is not None:
lookup_from = LookupLocation(
collection=node.lookup_from[0],
vector=node.lookup_from[1],
)

try:
if node.hybrid:
dense_model = node.model or self._config.default_model
Expand Down Expand Up @@ -1627,6 +1651,8 @@ def _execute_search_groups(
limit=node.limit,
group_size=node.group_size,
query_filter=qdrant_filter,
score_threshold=node.score_threshold,
lookup_from=lookup_from,
)
label = "hybrid, grouped"
elif node.sparse_only:
Expand All @@ -1645,6 +1671,8 @@ def _execute_search_groups(
group_size=node.group_size,
query_filter=qdrant_filter,
search_params=search_params,
score_threshold=node.score_threshold,
lookup_from=lookup_from,
)
label = "sparse, grouped"
else:
Expand All @@ -1660,6 +1688,8 @@ def _execute_search_groups(
group_size=node.group_size,
query_filter=qdrant_filter,
search_params=search_params,
score_threshold=node.score_threshold,
lookup_from=lookup_from,
)
label = "grouped"
except UnexpectedResponse as e:
Expand Down
37 changes: 36 additions & 1 deletion src/qql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,31 @@ def _parse_search(self) -> SearchStmt:
self._expect(TokenKind.LIMIT)
limit = int(self._expect(TokenKind.INTEGER).value)

offset: int = 0
if self._peek().kind == TokenKind.OFFSET:
self._advance()
offset_tok = self._peek()
offset = int(self._expect(TokenKind.INTEGER).value)
if offset < 0:
raise QQLSyntaxError("OFFSET must be a non-negative integer", offset_tok.pos)

score_threshold: float | None = None
if self._peek().kind == TokenKind.SCORE:
self._advance()
self._expect(TokenKind.THRESHOLD)
score_threshold = float(self._parse_number())

lookup_from: tuple[str, str | None] | None = None
if self._peek().kind == TokenKind.LOOKUP:
self._advance()
self._expect(TokenKind.FROM)
lookup_collection = self._parse_identifier()
lookup_vector: str | None = None
if self._peek().kind == TokenKind.VECTOR:
self._advance()
lookup_vector = self._expect(TokenKind.STRING).value
lookup_from = (lookup_collection, lookup_vector)

with_clause: SearchWith | None = None
if self._peek().kind == TokenKind.EXACT:
self._advance()
Expand Down Expand Up @@ -757,6 +782,7 @@ def _parse_search(self) -> SearchStmt:
if self._peek().kind == TokenKind.MODEL:
self._advance() # consume MODEL
rerank_model = self._expect(TokenKind.STRING).value

if self._peek().kind == TokenKind.EXACT:
self._advance()
if with_clause is None:
Expand All @@ -771,6 +797,7 @@ def _parse_search(self) -> SearchStmt:
mmr_diversity=with_clause.mmr_diversity,
mmr_candidates=with_clause.mmr_candidates,
)

if self._peek().kind == TokenKind.WITH:
self._advance() # consume WITH
parsed_with = self._parse_with_clause()
Expand All @@ -793,6 +820,8 @@ def _parse_search(self) -> SearchStmt:
group_by: str | None = None
group_size: int = 3
if self._peek().kind == TokenKind.GROUP:
if offset > 0:
raise QQLSyntaxError("OFFSET cannot be used with GROUP BY", self._peek().pos)
self._advance() # consume GROUP
self._expect(TokenKind.BY)
group_by = self._parse_field_path()
Expand Down Expand Up @@ -827,6 +856,9 @@ def _parse_search(self) -> SearchStmt:
group_size=group_size,
dense_vector=dense_vector,
sparse_vector=sparse_vector,
offset=offset,
score_threshold=score_threshold,
lookup_from=lookup_from,
)

def _parse_recommend(self) -> RecommendStmt:
Expand Down Expand Up @@ -870,13 +902,16 @@ def _parse_recommend(self) -> RecommendStmt:
offset: int = 0
if self._peek().kind == TokenKind.OFFSET:
self._advance()
offset_tok = self._peek()
offset = int(self._expect(TokenKind.INTEGER).value)
if offset < 0:
raise QQLSyntaxError("OFFSET must be a non-negative integer", offset_tok.pos)

score_threshold: float | None = None
if self._peek().kind == TokenKind.SCORE:
self._advance()
self._expect(TokenKind.THRESHOLD)
score_threshold = float(self._expect(TokenKind.FLOAT).value)
score_threshold = float(self._parse_number())

query_filter: FilterExpr | None = None
if self._peek().kind == TokenKind.WHERE:
Expand Down
Loading
Loading