diff --git a/README.md b/README.md index d530f7c..9d11cb9 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [](https://pypi.org/project/qql-cli/) [](https://pypi.org/project/qql-cli/) [](LICENSE) -[](tests/) +[](tests/) Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore. @@ -48,7 +48,7 @@ Your query string Qdrant instance ``` -When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) to merge the results of both retrieval methods. +When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) by default to merge the results of both retrieval methods. You can switch hybrid search to DBSF with `FUSION 'dbsf'`. --- @@ -102,6 +102,7 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}] SEARCH articles SIMILAR TO 'query' LIMIT 10 SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020 SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID +SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf' SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID RERANK -- Recommend @@ -137,7 +138,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked. pytest tests/ -v ``` -Expected: **375 tests passing**. +Expected: **405 tests passing**. --- diff --git a/docs/getting-started.md b/docs/getting-started.md index f44c1fd..e009a59 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -24,7 +24,7 @@ Your query string Qdrant instance ``` -When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) to merge the results of both retrieval methods. +When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) by default to merge the results of both retrieval methods. You can override that with `FUSION 'dbsf'` on hybrid searches. --- diff --git a/docs/index.html b/docs/index.html index 78cf475..ecc8b10 100644 --- a/docs/index.html +++ b/docs/index.html @@ -114,7 +114,7 @@
# Install diff --git a/docs/reference.md b/docs/reference.md index cf054d5..7fde77c 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -36,6 +36,9 @@ SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5' -- Hybrid with custom dense model SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' +-- Hybrid with explicit fusion strategy +SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID FUSION 'dbsf' + -- Hybrid with both custom SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' SPARSE MODEL 'prithivida/Splade_PP_en_v1' @@ -159,7 +162,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked. pytest tests/ -v ``` -Expected output: **375 tests passing**. +Expected output: **405 tests passing**. --- diff --git a/docs/search.md b/docs/search.md index 55d4e10..a88c1bc 100644 --- a/docs/search.md +++ b/docs/search.md @@ -14,7 +14,7 @@ SEARCHSIMILAR TO ' ' LIMIT SEARCH SIMILAR TO ' ' LIMIT USING MODEL ' ' SEARCH SIMILAR TO ' ' LIMIT [USING MODEL ' '] WHERE SEARCH SIMILAR TO ' ' LIMIT USING HYBRID -SEARCH SIMILAR TO ' ' LIMIT USING HYBRID [DENSE MODEL ' '] [SPARSE MODEL ' '] [WHERE ] +SEARCH SIMILAR TO ' ' LIMIT USING HYBRID [FUSION 'rrf|dbsf'] [DENSE MODEL ' '] [SPARSE MODEL ' '] [WHERE ] SEARCH SIMILAR TO ' ' LIMIT USING SPARSE [MODEL ' '] SEARCH SIMILAR TO ' ' LIMIT EXACT SEARCH SIMILAR TO ' ' LIMIT [USING ...] [WHERE ] [RERANK] WITH { hnsw_ef: , exact: true|false, acorn: true|false } @@ -33,7 +33,7 @@ Search only papers published after 2020: SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020 ``` -Hybrid search (combines dense semantic + sparse BM25 keyword retrieval via RRF): +Hybrid search (combines dense semantic + sparse BM25 keyword retrieval via RRF by default): ```sql SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 USING HYBRID ``` @@ -100,13 +100,13 @@ SEARCH articles SIMILAR TO 'RAG' LIMIT 10 WHERE tag = 'li' WITH { acorn: true } ## Hybrid Search (USING HYBRID) -Hybrid search combines **dense semantic vectors** and **sparse BM25 keyword vectors** in a single query and merges the results with Qdrant's **Reciprocal Rank Fusion (RRF)** algorithm. This typically outperforms either method alone. +Hybrid search combines **dense semantic vectors** and **sparse BM25 keyword vectors** in a single query. By default QQL merges the two result sets with Qdrant's **Reciprocal Rank Fusion (RRF)** algorithm, and you can optionally switch to **DBSF** with a `FUSION` clause. ### How it works internally 1. Both a dense vector (`TextEmbedding`) and a sparse BM25 vector (`SparseTextEmbedding`) are generated from your query text. 2. Qdrant fetches the top candidates from each index independently (`prefetch limit = LIMIT × 4`). -3. The two result lists are merged using RRF — a rank-based fusion that does not require score normalization. +3. The two result lists are merged using the selected fusion strategy (`RRF` by default, or `DBSF` when requested). 4. The final top-N results are returned. ### Step 1: Create a hybrid collection @@ -139,6 +139,9 @@ SEARCH articles SIMILAR TO 'transformer architecture' LIMIT 10 USING HYBRID -- Hybrid search with a WHERE filter SEARCH articles SIMILAR TO 'attention' LIMIT 10 USING HYBRID WHERE year >= 2017 +-- Hybrid with DBSF fusion +SEARCH articles SIMILAR TO 'hybrid retrieval' LIMIT 10 USING HYBRID FUSION 'dbsf' + -- Hybrid with custom dense model SEARCH articles SIMILAR TO 'embeddings' LIMIT 5 USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' @@ -154,6 +157,7 @@ SEARCH articles SIMILAR TO 'sparse retrieval' LIMIT 5 |---|---| | Dense model | configured default (`sentence-transformers/all-MiniLM-L6-v2`) | | Sparse model | `Qdrant/bm25` | +| Fusion | `rrf` | ### Dense vs. hybrid — when to use which diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py index b9f8b50..6a723e8 100644 --- a/src/qql/ast_nodes.py +++ b/src/qql/ast_nodes.py @@ -187,6 +187,7 @@ class SearchStmt: limit: int model: str | None # dense model; None → use config default hybrid: bool = False # if True, use prefetch+RRF hybrid search + fusion: str | None = None # hybrid fusion strategy; None → default rrf sparse_only: bool = False # if True, query only the sparse vector (no dense) sparse_model: str | None = None # sparse model for hybrid/sparse-only; None → SparseEmbedder.DEFAULT_MODEL query_filter: FilterExpr | None = None # optional WHERE clause; default keeps existing tests valid diff --git a/src/qql/cli.py b/src/qql/cli.py index 25f18a3..7a485ee 100644 --- a/src/qql/cli.py +++ b/src/qql/cli.py @@ -52,7 +52,7 @@ [yellow]SEARCH[/yellow] [yellow]SIMILAR TO[/yellow] ' ' [yellow]LIMIT[/yellow] Semantic search by vector similarity. Optional: [yellow]USING MODEL[/yellow] ' ' - Optional: [yellow]USING HYBRID[/yellow] [DENSE MODEL ' '] [SPARSE MODEL ' '] + Optional: [yellow]USING HYBRID[/yellow] [FUSION 'rrf|dbsf'] [DENSE MODEL ' '] [SPARSE MODEL ' '] Optional: [yellow]USING SPARSE[/yellow] [MODEL ' '] sparse-vector-only search Optional: [yellow]WHERE[/yellow] (e.g. WHERE year > 2020 AND status = 'ok') Optional: [yellow]RERANK[/yellow] [MODEL ' '] rerank results with a cross-encoder diff --git a/src/qql/executor.py b/src/qql/executor.py index 9203a19..0338b8a 100644 --- a/src/qql/executor.py +++ b/src/qql/executor.py @@ -429,7 +429,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult: # enough material to reorder; only `node.limit` results are returned. fetch_limit = node.limit * _RERANK_FETCH_MULTIPLIER if node.rerank else node.limit - # ── Hybrid SEARCH: prefetch dense+sparse, fuse with RRF ─────────── + # ── Hybrid SEARCH: prefetch dense+sparse, fuse with the requested strategy ── if node.hybrid: dense_model = node.model or self._config.default_model sparse_model_name = node.sparse_model or SparseEmbedder.DEFAULT_MODEL @@ -460,7 +460,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult: params=search_params, ), ], - query=FusionQuery(fusion=Fusion.RRF), + query=FusionQuery(fusion=self._resolve_hybrid_fusion(node.fusion)), limit=fetch_limit, query_filter=qdrant_filter, ) @@ -563,6 +563,15 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult: data=results, ) + def _resolve_hybrid_fusion(self, fusion: str | None) -> Fusion: + if fusion is None or fusion == "rrf": + return Fusion.RRF + if fusion == "dbsf": + return Fusion.DBSF + raise QQLRuntimeError( + f"Unsupported hybrid fusion '{fusion}'; expected 'rrf' or 'dbsf'" + ) + def _execute_recommend(self, node: RecommendStmt) -> ExecutionResult: if not self._client.collection_exists(node.collection): raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") diff --git a/src/qql/lexer.py b/src/qql/lexer.py index 7a1f283..aed0af1 100644 --- a/src/qql/lexer.py +++ b/src/qql/lexer.py @@ -14,6 +14,7 @@ class TokenKind(Enum): USING = auto() MODEL = auto() HYBRID = auto() + FUSION = auto() DENSE = auto() SPARSE = auto() RERANK = auto() @@ -102,6 +103,7 @@ class TokenKind(Enum): "USING": TokenKind.USING, "MODEL": TokenKind.MODEL, "HYBRID": TokenKind.HYBRID, + "FUSION": TokenKind.FUSION, "DENSE": TokenKind.DENSE, "SPARSE": TokenKind.SPARSE, "RERANK": TokenKind.RERANK, diff --git a/src/qql/parser.py b/src/qql/parser.py index ef5e6fc..8987693 100644 --- a/src/qql/parser.py +++ b/src/qql/parser.py @@ -43,6 +43,8 @@ TokenKind.LTE: "<=", } +_HYBRID_FUSION_VALUES = {"rrf", "dbsf"} + class Parser: def __init__(self, tokens: list[Token]) -> None: @@ -304,6 +306,7 @@ def _parse_search(self) -> SearchStmt: model: str | None = None hybrid: bool = False + fusion: str | None = None sparse_only: bool = False sparse_model: str | None = None if self._peek().kind == TokenKind.USING: @@ -311,9 +314,18 @@ def _parse_search(self) -> SearchStmt: if self._peek().kind == TokenKind.HYBRID: self._advance() # consume HYBRID hybrid = True - # Optional DENSE MODEL and/or SPARSE MODEL sub-clauses, any order - while self._peek().kind in (TokenKind.DENSE, TokenKind.SPARSE): + # Optional FUSION / DENSE MODEL / SPARSE MODEL sub-clauses, any order. + while self._peek().kind in (TokenKind.FUSION, TokenKind.DENSE, TokenKind.SPARSE): sub = self._advance() + if sub.kind == TokenKind.FUSION: + value_tok = self._expect(TokenKind.STRING) + fusion = value_tok.value.lower() + if fusion not in _HYBRID_FUSION_VALUES: + raise QQLSyntaxError( + f"Unsupported hybrid fusion '{value_tok.value}'; expected 'rrf' or 'dbsf'", + value_tok.pos, + ) + continue self._expect(TokenKind.MODEL) m = self._expect(TokenKind.STRING).value if sub.kind == TokenKind.DENSE: @@ -368,6 +380,7 @@ def _parse_search(self) -> SearchStmt: limit=limit, model=model, hybrid=hybrid, + fusion=fusion, sparse_only=sparse_only, sparse_model=sparse_model, query_filter=query_filter, diff --git a/tests/test_executor.py b/tests/test_executor.py index d5408d8..62e0952 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -1063,6 +1063,29 @@ def test_hybrid_search_uses_rrf_fusion( assert isinstance(kw["query"], FusionQuery) assert kw["query"].fusion == Fusion.RRF + def test_hybrid_search_uses_dbsf_fusion( + self, executor, mock_client, mock_sparse_embedder, mocker + ): + from qdrant_client.models import Fusion, FusionQuery + + mock_client.collection_exists.return_value = True + mock_resp = mocker.MagicMock() + mock_resp.points = [] + mock_client.query_points.return_value = mock_resp + + node = SearchStmt( + collection="col", + query_text="q", + limit=5, + model=None, + hybrid=True, + fusion="dbsf", + ) + executor.execute(node) + kw = mock_client.query_points.call_args.kwargs + assert isinstance(kw["query"], FusionQuery) + assert kw["query"].fusion == Fusion.DBSF + def test_hybrid_search_prefetch_limit_is_4x( self, executor, mock_client, mock_sparse_embedder, mocker ): diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 95bdb41..99a8f8a 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -212,6 +212,10 @@ def test_sparse_keyword_lowercase(self): ks = kinds("sparse") assert ks[0] == TokenKind.SPARSE + def test_fusion_keyword(self): + ks = kinds("FUSION") + assert ks[0] == TokenKind.FUSION + def test_hybrid_in_create_statement(self): ks = kinds("CREATE COLLECTION articles HYBRID") assert ks[3] == TokenKind.HYBRID diff --git a/tests/test_parser.py b/tests/test_parser.py index e229bf3..16f4614 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -704,6 +704,24 @@ def test_search_hybrid_with_where(self): assert isinstance(node.query_filter, CompareExpr) assert node.query_filter.field == "year" + def test_search_hybrid_with_dbsf_fusion(self): + node = parse( + "SEARCH docs SIMILAR TO 'q' LIMIT 10 USING HYBRID FUSION 'dbsf'" + ) + assert node.hybrid is True + assert node.fusion == "dbsf" + + def test_search_hybrid_with_fusion_and_models(self): + node = parse( + "SEARCH docs SIMILAR TO 'q' LIMIT 10 " + "USING HYBRID FUSION 'rrf' SPARSE MODEL 'Qdrant/bm25' " + "DENSE MODEL 'BAAI/bge-base-en-v1.5'" + ) + assert node.hybrid is True + assert node.fusion == "rrf" + assert node.sparse_model == "Qdrant/bm25" + assert node.model == "BAAI/bge-base-en-v1.5" + def test_search_hybrid_dense_model_and_where(self): node = parse( "SEARCH articles SIMILAR TO 'ml' LIMIT 10 " @@ -713,6 +731,10 @@ def test_search_hybrid_dense_model_and_where(self): assert node.model == "BAAI/bge-small-en-v1.5" assert isinstance(node.query_filter, CompareExpr) + def test_search_hybrid_rejects_unknown_fusion(self): + with pytest.raises(QQLSyntaxError, match="Unsupported hybrid fusion"): + parse("SEARCH docs SIMILAR TO 'q' LIMIT 10 USING HYBRID FUSION 'x'") + def test_search_hybrid_limit_preserved(self): node = parse("SEARCH col SIMILAR TO 'q' LIMIT 7 USING HYBRID") assert node.limit == 7