diff --git a/README.md b/README.md index bcd0876..41bc6f1 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![MIT License](https://img.shields.io/badge/license-MIT-green)](LICENSE) [![Tests](https://img.shields.io/badge/tests-405%20passing-brightgreen)](tests/) -Write `INSERT`, `SEARCH`, `SCROLL`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore. +Write `INSERT`, `SELECT`, `SEARCH`, `SCROLL`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore. ``` qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024} @@ -82,7 +82,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith |---|---| | [Getting Started](docs/getting-started.md) | Installation, connecting, first queries | | [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types | -| [SEARCH / SCROLL / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, pagination, hybrid, reranking, recommendations | +| [SEARCH / SELECT / SCROLL / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, point retrieval, pagination, hybrid, reranking, recommendations | | [WHERE Filters](docs/filters.md) | Full SQL-style filter operators | | [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX | | [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore | @@ -113,6 +113,9 @@ SCROLL FROM articles AFTER 'cursor-id' LIMIT 50 -- Recommend RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5 +-- Select (retrieve a point by ID) +SELECT * FROM articles WHERE id = '3f2e1a4b-...' + -- Collections CREATE COLLECTION articles CREATE COLLECTION articles HYBRID diff --git a/docs/getting-started.md b/docs/getting-started.md index 1066f63..119b76d 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -143,6 +143,9 @@ SCROLL FROM notes LIMIT 10 -- List all collections SHOW COLLECTIONS + +-- Retrieve a point by ID +SELECT * FROM notes WHERE id = 1 ``` --- @@ -150,7 +153,7 @@ SHOW COLLECTIONS ## Next Steps - [INSERT / INSERT BULK](insert.md) — adding documents -- [SEARCH / SCROLL / RECOMMEND / Hybrid / RERANK](search.md) — querying +- [SEARCH / SELECT / SCROLL / RECOMMEND / Hybrid / RERANK](search.md) — querying - [WHERE Filters](filters.md) — payload filtering - [Collections & Quantization](collections.md) — managing collections - [Scripts: EXECUTE / DUMP](scripts.md) — automating with script files diff --git a/docs/index.html b/docs/index.html index 5a4b8af..f195910 100644 --- a/docs/index.html +++ b/docs/index.html @@ -148,8 +148,8 @@

INSERT / INSERT BULK

Adding documents, batch inserts, payload types

-

SEARCH / SCROLL / RECOMMEND

-

Semantic search, pagination, hybrid search, reranking, recommendations

+

SEARCH / SELECT / SCROLL / RECOMMEND

+

Semantic search, point retrieval, pagination, hybrid search, reranking, recommendations

WHERE Filters

diff --git a/docs/programmatic.md b/docs/programmatic.md index e252150..1d18eca 100644 --- a/docs/programmatic.md +++ b/docs/programmatic.md @@ -67,6 +67,13 @@ result = run_query( for hit in result.data: print(hit["score"], hit["payload"]) +# Retrieve a point by ID +result = run_query( + "SELECT * FROM notes WHERE id = 1", + url="http://localhost:6333", +) +print(result.data) # {"id": "1", "payload": {...}} + # Delete by filter result = run_query( "DELETE FROM notes WHERE year < 2023", @@ -120,6 +127,7 @@ class ExecutionResult: | INSERT (dense) | `{"id": int \| "", "collection": ""}` | | INSERT (hybrid) | `{"id": int \| "", "collection": ""}` | | INSERT BULK | `None` (count in `result.message`) | +| SELECT | `{"id": str, "payload": dict}` or `None` when not found | | SEARCH | `[{"id": str, "score": float, "payload": dict}, ...]` | | SCROLL | `{"points": [{"id": str, "payload": dict}, ...], "next_offset": str \| None}` | | RECOMMEND | `[{"id": str, "score": float, "payload": dict}, ...]` | diff --git a/docs/reference.md b/docs/reference.md index 778c8c0..93c59b7 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -174,8 +174,9 @@ Expected output: **405 tests passing**. | `Connection failed: ...` | Qdrant unreachable at given URL | Check that Qdrant is running and the URL is correct | | `INSERT requires a 'text' field in VALUES` | `text` key missing from the VALUES dict | Add `'text': '...'` to your dict | | `Vector dimension mismatch: collection '...' expects X dims, but model produces Y dims` | Model used in INSERT differs from the one used to create the collection | Use `USING MODEL` to specify the same model as the collection was created with | -| `Collection '...' does not exist` | SEARCH / SCROLL / DROP / DELETE on a non-existent collection | Check name spelling or run `SHOW COLLECTIONS` | -| `Unexpected token '...'; expected a QQL statement keyword` | Unrecognized statement | Check the query syntax; QQL does not support SQL SELECT | +| `Collection '...' does not exist` | SEARCH / SCROLL / SELECT / DROP / DELETE on a non-existent collection | Check name spelling or run `SHOW COLLECTIONS` | +| `Unexpected token '...'; expected a QQL statement keyword` | Unrecognized statement | Check the query syntax and supported statement list | +| `SELECT requires a string or integer point id, got '...'` | `SELECT` used with a non-ID filter value | Use `SELECT * FROM WHERE id = ''` or an integer ID | | `Unterminated string literal (at position N)` | A string is missing its closing quote | Close the string with a matching `'` or `"` | | `Unexpected character '@' (at position N)` | A character not part of QQL syntax | Remove or quote the offending character | | `Expected a filter operator after field '...'` | Unknown operator in WHERE clause | Use one of: `=`, `!=`, `>`, `>=`, `<`, `<=`, `IN`, `NOT IN`, `BETWEEN`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `MATCH` | diff --git a/docs/search.md b/docs/search.md index 58edfdc..f475aa6 100644 --- a/docs/search.md +++ b/docs/search.md @@ -1,4 +1,4 @@ -# SEARCH, SCROLL, RECOMMEND, Hybrid Search & Reranking +# SEARCH, SELECT, SCROLL, RECOMMEND, Hybrid Search & Reranking --- @@ -70,6 +70,28 @@ Results are displayed as a table with three columns: --- +## SELECT — retrieve a point by ID + +Fetches a single point payload by exact point ID. + +**Syntax:** +```sql +SELECT * FROM WHERE id = '' +SELECT * FROM WHERE id = +``` + +**Examples:** +```sql +SELECT * FROM articles WHERE id = '3f2e1a4b-8c91-4d0e-b123-abc123def456' +SELECT * FROM articles WHERE id = 42 +``` + +`SELECT` in this version is intentionally strict: +- only `*` projection is supported +- only `WHERE id = ...` is supported + +--- + ## Query-Time Search Params (`EXACT`, `WITH`) Use these when you want to debug retrieval quality or tune recall without changing collection-level settings. diff --git a/pyproject.toml b/pyproject.toml index fcd088b..10bdabb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "qql-cli" -version = "2.1.0" +version = "2.2.0" description = "QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), WHERE clause filters, script execution, and collection dump/restore." readme = "README.md" license = { file = "LICENSE" } diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py index 93638df..962d72b 100644 --- a/src/qql/ast_nodes.py +++ b/src/qql/ast_nodes.py @@ -180,6 +180,12 @@ class ShowCollectionsStmt: pass +@dataclass(frozen=True) +class SelectStmt: + collection: str + point_id: str | int + + @dataclass(frozen=True) class ScrollStmt: collection: str @@ -234,6 +240,7 @@ class DeleteStmt: | CreateIndexStmt | DropCollectionStmt | ShowCollectionsStmt + | SelectStmt | ScrollStmt | SearchStmt | RecommendStmt diff --git a/src/qql/cli.py b/src/qql/cli.py index 4329afd..1f04143 100644 --- a/src/qql/cli.py +++ b/src/qql/cli.py @@ -54,6 +54,9 @@ Optional: [yellow]WHERE[/yellow] Optional: [yellow]AFTER[/yellow] ''| + [yellow]SELECT * FROM[/yellow] [yellow]WHERE id =[/yellow] ''| + Retrieve a single point by its ID and return its payload. + [yellow]SEARCH[/yellow] [yellow]SIMILAR TO[/yellow] '' [yellow]LIMIT[/yellow] Semantic search by vector similarity. Optional: [yellow]USING MODEL[/yellow] '' @@ -419,5 +422,14 @@ def _run_and_print(executor: Executor, query: str) -> None: console.print(f"[dim]next_offset: {result.data['next_offset']}[/dim]") return + # Pretty-print SELECT result + if isinstance(result.data, dict) and "id" in result.data and "payload" in result.data: + table = Table(show_header=True, header_style="bold cyan") + table.add_column("ID") + table.add_column("Payload") + table.add_row(str(result.data["id"]), str(result.data["payload"])) + console.print(table) + return + # Fallback: print data as-is console.print(result.data) diff --git a/src/qql/executor.py b/src/qql/executor.py index bb4ab3c..fc261e5 100644 --- a/src/qql/executor.py +++ b/src/qql/executor.py @@ -76,6 +76,7 @@ QuantizationConfig, QuantizationType, RecommendStmt, + SelectStmt, ScrollStmt, SearchStmt, SearchWith, @@ -118,6 +119,8 @@ def execute(self, node: ASTNode) -> ExecutionResult: return self._execute_show(node) if isinstance(node, ScrollStmt): return self._execute_scroll(node) + if isinstance(node, SelectStmt): + return self._execute_select(node) if isinstance(node, SearchStmt): return self._execute_search(node) if isinstance(node, RecommendStmt): @@ -447,6 +450,33 @@ def _execute_scroll(self, node: ScrollStmt) -> ExecutionResult: data={"points": points, "next_offset": None if next_offset is None else str(next_offset)}, ) + def _execute_select(self, node: SelectStmt) -> ExecutionResult: + if not self._client.collection_exists(node.collection): + raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") + + try: + records = self._client.retrieve( + collection_name=node.collection, + ids=[node.point_id], + with_payload=True, + with_vectors=False, + ) + except UnexpectedResponse as e: + raise QQLRuntimeError(f"Qdrant error during SELECT: {e}") from e + + if not records: + return ExecutionResult( + success=True, + message=f"Point '{node.point_id}' not found in '{node.collection}'", + ) + + record = records[0] + return ExecutionResult( + success=True, + message=f"Retrieved point '{node.point_id}' from '{node.collection}'", + data={"id": str(record.id), "payload": record.payload or {}}, + ) + def _execute_search(self, node: SearchStmt) -> ExecutionResult: if not self._client.collection_exists(node.collection): raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") diff --git a/src/qql/lexer.py b/src/qql/lexer.py index fcf5d3c..0b397b0 100644 --- a/src/qql/lexer.py +++ b/src/qql/lexer.py @@ -35,6 +35,7 @@ class TokenKind(Enum): ON = auto() DROP = auto() SHOW = auto() + SELECT = auto() COLLECTIONS = auto() SCROLL = auto() SEARCH = auto() @@ -82,6 +83,7 @@ class TokenKind(Enum): RBRACKET = auto() LPAREN = auto() RPAREN = auto() + STAR = auto() COLON = auto() COMMA = auto() EQUALS = auto() @@ -126,6 +128,7 @@ class TokenKind(Enum): "ON": TokenKind.ON, "DROP": TokenKind.DROP, "SHOW": TokenKind.SHOW, + "SELECT": TokenKind.SELECT, "COLLECTIONS": TokenKind.COLLECTIONS, "SCROLL": TokenKind.SCROLL, "SEARCH": TokenKind.SEARCH, @@ -203,6 +206,9 @@ def tokenize(self, query: str) -> list[Token]: elif ch == ")": tokens.append(Token(TokenKind.RPAREN, ")", i)) i += 1 + elif ch == "*": + tokens.append(Token(TokenKind.STAR, "*", i)) + i += 1 elif ch == ":": tokens.append(Token(TokenKind.COLON, ":", i)) i += 1 diff --git a/src/qql/parser.py b/src/qql/parser.py index 5142e74..a5975a4 100644 --- a/src/qql/parser.py +++ b/src/qql/parser.py @@ -26,6 +26,7 @@ QuantizationConfig, QuantizationType, RecommendStmt, + SelectStmt, ScrollStmt, SearchStmt, SearchWith, @@ -66,6 +67,8 @@ def parse(self) -> ASTNode: node = self._parse_show() elif tok.kind == TokenKind.SCROLL: node = self._parse_scroll() + elif tok.kind == TokenKind.SELECT: + node = self._parse_select() elif tok.kind == TokenKind.SEARCH: node = self._parse_search() elif tok.kind == TokenKind.RECOMMEND: @@ -319,6 +322,17 @@ def _parse_scroll(self) -> ScrollStmt: after=after, ) + def _parse_select(self) -> SelectStmt: + self._expect(TokenKind.SELECT) + self._expect(TokenKind.STAR) + self._expect(TokenKind.FROM) + collection = self._parse_identifier() + self._expect(TokenKind.WHERE) + self._expect(TokenKind.ID) + self._expect(TokenKind.EQUALS) + point_id = self._parse_point_id_value("SELECT") + return SelectStmt(collection=collection, point_id=point_id) + def _parse_search(self) -> SearchStmt: self._expect(TokenKind.SEARCH) collection = self._parse_identifier() diff --git a/src/qql/script.py b/src/qql/script.py index 0cf997c..1bff1da 100644 --- a/src/qql/script.py +++ b/src/qql/script.py @@ -24,6 +24,7 @@ TokenKind.CREATE, TokenKind.DROP, TokenKind.SHOW, + TokenKind.SELECT, TokenKind.SCROLL, TokenKind.SEARCH, TokenKind.RECOMMEND, @@ -55,7 +56,7 @@ def split_statements(tokens: list[Token]) -> list[list[Token]]: """Split a flat token list into per-statement chunks. A new chunk begins whenever a statement-starter keyword (INSERT, CREATE, - DROP, SHOW, SCROLL, SEARCH, RECOMMEND, DELETE) is encountered at + DROP, SHOW, SCROLL, SELECT, SEARCH, RECOMMEND, DELETE) is encountered at brace/bracket/paren depth 0. The EOF sentinel is consumed and never included in any chunk. """ diff --git a/tests/test_executor.py b/tests/test_executor.py index f69aa84..0f318d8 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -10,6 +10,7 @@ QuantizationConfig, QuantizationType, RecommendStmt, + SelectStmt, ScrollStmt, SearchStmt, SearchWith, @@ -415,6 +416,44 @@ def test_scroll_nonexistent_collection_raises(self, executor, mock_client): executor.execute(node) +class TestSelect: + def test_select_by_id_returns_payload(self, executor, mock_client, mocker): + mock_client.collection_exists.return_value = True + rec = mocker.MagicMock() + rec.id = "abc-123" + rec.payload = {"text": "hello", "year": 2024} + mock_client.retrieve.return_value = [rec] + + node = SelectStmt(collection="notes", point_id="abc-123") + result = executor.execute(node) + + mock_client.retrieve.assert_called_once_with( + collection_name="notes", + ids=["abc-123"], + with_payload=True, + with_vectors=False, + ) + assert result.success is True + assert result.data == {"id": "abc-123", "payload": {"text": "hello", "year": 2024}} + + def test_select_not_found(self, executor, mock_client): + mock_client.collection_exists.return_value = True + mock_client.retrieve.return_value = [] + + node = SelectStmt(collection="notes", point_id=7) + result = executor.execute(node) + + assert result.success is True + assert "not found" in result.message + assert result.data is None + + def test_select_nonexistent_collection_raises(self, executor, mock_client): + mock_client.collection_exists.return_value = False + node = SelectStmt(collection="ghost", point_id="x") + with pytest.raises(QQLRuntimeError, match="does not exist"): + executor.execute(node) + + class TestSearch: def test_search_calls_qdrant_query_points(self, executor, mock_client, mocker): mock_client.collection_exists.return_value = True diff --git a/tests/test_lexer.py b/tests/test_lexer.py index c4088fe..1ac5e29 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -46,6 +46,13 @@ def test_scroll_keywords(self): assert TokenKind.AFTER in ks assert TokenKind.LIMIT in ks + def test_select_keywords(self): + ks = kinds("SELECT * FROM notes WHERE id = 'abc'") + assert ks[0] == TokenKind.SELECT + assert ks[1] == TokenKind.STAR + assert ks[2] == TokenKind.FROM + assert ks[4] == TokenKind.WHERE + def test_delete_keywords(self): ks = kinds("DELETE FROM foo WHERE id = 'abc'") assert ks[:4] == [TokenKind.DELETE, TokenKind.FROM, TokenKind.IDENTIFIER, TokenKind.WHERE] @@ -96,6 +103,10 @@ def test_brackets(self): assert ks[0] == TokenKind.LBRACKET assert ks[-2] == TokenKind.RBRACKET + def test_star(self): + ks = kinds("*") + assert ks[0] == TokenKind.STAR + class TestErrors: def test_unterminated_string(self): diff --git a/tests/test_parser.py b/tests/test_parser.py index 32f6cd5..e155152 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -24,6 +24,7 @@ QuantizationConfig, QuantizationType, RecommendStmt, + SelectStmt, ScrollStmt, SearchStmt, SearchWith, @@ -218,6 +219,23 @@ def test_scroll_with_where_and_after(self): assert isinstance(node.query_filter, CompareExpr) +class TestSelect: + def test_select_by_string_id(self): + node = parse("SELECT * FROM notes WHERE id = 'abc-123'") + assert isinstance(node, SelectStmt) + assert node.collection == "notes" + assert node.point_id == "abc-123" + + def test_select_by_integer_id(self): + node = parse("SELECT * FROM notes WHERE id = 42") + assert isinstance(node, SelectStmt) + assert node.point_id == 42 + + def test_select_requires_id_filter(self): + with pytest.raises(QQLSyntaxError): + parse("SELECT * FROM notes WHERE year = 2024") + + class TestSearch: def test_basic_search(self): node = parse("SEARCH notes SIMILAR TO 'hello world' LIMIT 5") @@ -363,7 +381,7 @@ def test_recommend_full_clause_order(self): class TestErrors: def test_unknown_keyword(self): with pytest.raises(QQLSyntaxError): - parse("SELECT * FROM foo") + parse("UPSERT INTO foo VALUES {'text': 'x'}") def test_missing_collection_name(self): with pytest.raises(QQLSyntaxError): diff --git a/tests/test_script.py b/tests/test_script.py index 27c27af..f8dca3b 100644 --- a/tests/test_script.py +++ b/tests/test_script.py @@ -123,6 +123,18 @@ def test_scroll_starts_new_top_level_statement(self): assert len(chunks) == 3 assert chunks[1][0].kind == TokenKind.SCROLL + def test_select_starts_new_top_level_statement(self): + from qql.lexer import TokenKind + + tokens = tokenize( + "SHOW COLLECTIONS\n" + "SELECT * FROM x WHERE id = 'id-1'\n" + "DROP COLLECTION x" + ) + chunks = split_statements(tokens) + assert len(chunks) == 3 + assert chunks[1][0].kind == TokenKind.SELECT + # ── run_script ────────────────────────────────────────────────────────────────