From 6e921b578093702ab491a20edba98ffd495f0814 Mon Sep 17 00:00:00 2001
From: "manthapavankumar11@gmail.com" <manthapavankumar11@gmail.com>
Date: Fri, 10 Apr 2026 06:57:42 +0530
Subject: [PATCH] enhanced with advanced filter logics

---
 README.md              | 232 +++++++++++++++++++++++++++++++++++++----
 src/qql/ast_nodes.py   | 117 ++++++++++++++++++++-
 src/qql/executor.py    | 136 +++++++++++++++++++++++-
 src/qql/lexer.py       | 108 +++++++++++++++++--
 src/qql/parser.py      | 227 +++++++++++++++++++++++++++++++++++++++-
 tests/test_executor.py | 215 ++++++++++++++++++++++++++++++++++++++
 tests/test_lexer.py    |  76 ++++++++++++++
 tests/test_parser.py   | 194 ++++++++++++++++++++++++++++++++++
 8 files changed, 1263 insertions(+), 42 deletions(-)

diff --git a/README.md b/README.md
index cc2d3c0..28de472 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,16 @@
 # QQL — Qdrant Query Language
 
-A SQL-like CLI for [Qdrant](https://qdrant.tech), a high-performance vector database. Instead of writing Python SDK calls, you write natural query statements to insert, search, manage, and delete vector data.
+A SQL-like CLI for [Qdrant](https://qdrant.tech), a high-performance vector database. Instead of writing Python SDK calls, you write natural query statements to insert, search, manage, and delete vector data — including rich SQL-style `WHERE` filters.
 
 ```
-qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice'}
+qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
 ✓ Inserted 1 point [3f2e1a4b-8c91-4d0e-b123-abc123def456]
 
-qql> SEARCH notes SIMILAR TO 'vector storage engines' LIMIT 3
-✓ Found 2 result(s)
+qql> SEARCH notes SIMILAR TO 'vector storage engines' LIMIT 3 WHERE year >= 2023
+✓ Found 1 result(s)
  Score  │ ID                                   │ Payload
 ────────┼──────────────────────────────────────┼──────────────────────────────────────
- 0.8931 │ 3f2e1a4b-8c91-4d0e-b123-abc123def456 │ {'text': 'Qdrant is a ...', 'author': 'alice'}
+ 0.8931 │ 3f2e1a4b-8c91-4d0e-b123-abc123def456 │ {'text': 'Qdrant is a ...', 'author': 'alice', 'year': 2024}
 ```
 
 ---
@@ -24,6 +24,7 @@ qql> SEARCH notes SIMILAR TO 'vector storage engines' LIMIT 3
 - [All QQL Operations](#all-qql-operations)
   - [INSERT — add a point](#insert--add-a-point)
   - [SEARCH — find similar points](#search--find-similar-points)
+  - [WHERE Clause Filters](#where-clause-filters)
   - [SHOW COLLECTIONS — list collections](#show-collections--list-collections)
   - [CREATE COLLECTION — create a collection](#create-collection--create-a-collection)
   - [DROP COLLECTION — delete a collection](#drop-collection--delete-a-collection)
@@ -60,6 +61,8 @@ Your query string
 
 When you run `INSERT`, the `text` field in your dictionary is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). The vector and the rest of your fields (stored as payload) are then upserted into Qdrant together. You never have to manage vectors manually.
 
+`SEARCH` also embeds your query text and finds the nearest vectors by cosine similarity. An optional `WHERE` clause lets you pre-filter the candidate set using any payload field before similarity ranking — exactly like a SQL `WHERE` on top of a vector search.
+
 ---
 
 ## Installation
@@ -195,7 +198,7 @@ Insert with a specific embedding model:
 INSERT INTO COLLECTION articles VALUES {'text': 'hello world'} USING MODEL 'BAAI/bge-small-en-v1.5'
 ```
 
-Insert with nested metadata:
+Insert with nested metadata and tags:
 ```sql
 INSERT INTO COLLECTION articles VALUES {
   'text': 'Attention is all you need',
@@ -221,10 +224,13 @@ INSERT INTO COLLECTION articles VALUES {
 
 Performs a **semantic similarity search**: your query text is embedded with the same model used during insert, then Qdrant finds the nearest vectors by cosine distance.
 
+An optional `WHERE` clause filters the candidate set **before** similarity ranking so you only get results that match both the semantic query and the payload conditions.
+
 **Syntax:**
 ```
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n>
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING MODEL '<model_name>'
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING MODEL '<model_name>'] WHERE <filter>
 ```
 
 **Examples:**
@@ -234,9 +240,19 @@ Basic search, return top 5 results:
 SEARCH articles SIMILAR TO 'machine learning algorithms' LIMIT 5
 ```
 
-Search with a specific model:
+Search only papers published after 2020:
+```sql
+SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020
+```
+
+Search within a specific category, excluding drafts:
 ```sql
-SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 USING MODEL 'BAAI/bge-small-en-v1.5'
+SEARCH articles SIMILAR TO 'neural networks' LIMIT 5 WHERE category = 'ml' AND status != 'draft'
+```
+
+Search with a model override and a filter:
+```sql
+SEARCH articles SIMILAR TO 'embeddings' LIMIT 10 USING MODEL 'BAAI/bge-small-en-v1.5' WHERE year >= 2022
 ```
 
 **Output:**
@@ -258,6 +274,165 @@ Results are displayed as a table with three columns:
 
 ---
 
+### WHERE Clause Filters
+
+The `WHERE` clause lets you filter on any payload field using SQL-style predicates. All standard comparison, range, membership, null-check, and full-text operators are supported.
+
+#### Equality and inequality
+
+```sql
+-- Exact match
+SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE category = 'paper'
+
+-- Not equal
+SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE status != 'draft'
+```
+
+#### Range comparisons
+
+```sql
+-- Greater than / less than
+SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE score > 0.8
+SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE year < 2024
+
+-- Greater than or equal / less than or equal
+SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE score >= 0.75
+SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE year <= 2023
+```
+
+#### BETWEEN … AND
+
+```sql
+-- Inclusive range (equivalent to year >= 2018 AND year <= 2023)
+SEARCH articles SIMILAR TO 'history of ai' LIMIT 10 WHERE year BETWEEN 2018 AND 2023
+```
+
+#### IN and NOT IN
+
+```sql
+-- Field value must be one of the listed values
+SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status IN ('published', 'reviewed')
+
+-- Field value must not be any of the listed values
+SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status NOT IN ('deleted', 'archived')
+
+-- Trailing commas are allowed
+SEARCH articles SIMILAR TO 'x' LIMIT 5 WHERE status IN ('a', 'b',)
+```
+
+#### IS NULL and IS NOT NULL
+
+```sql
+-- Points where the reviewer field is absent or explicitly null
+SEARCH articles SIMILAR TO 'peer review' LIMIT 5 WHERE reviewer IS NULL
+
+-- Points where reviewer is set to any non-null value
+SEARCH articles SIMILAR TO 'peer review' LIMIT 5 WHERE reviewer IS NOT NULL
+```
+
+#### IS EMPTY and IS NOT EMPTY
+
+```sql
+-- Points where the tags list is empty
+SEARCH articles SIMILAR TO 'untagged' LIMIT 5 WHERE tags IS EMPTY
+
+-- Points where the tags list has at least one element
+SEARCH articles SIMILAR TO 'categorized' LIMIT 5 WHERE tags IS NOT EMPTY
+```
+
+#### Full-text MATCH
+
+```sql
+-- All terms in the string must appear in the field (full-text index required)
+SEARCH articles SIMILAR TO 'search' LIMIT 10 WHERE title MATCH 'vector database'
+
+-- Any term in the string can match
+SEARCH articles SIMILAR TO 'search' LIMIT 10 WHERE title MATCH ANY 'embedding retrieval'
+
+-- The exact phrase must appear
+SEARCH articles SIMILAR TO 'search' LIMIT 10 WHERE title MATCH PHRASE 'semantic search'
+```
+
+> Full-text MATCH requires a Qdrant full-text index on the field. Create one in the Qdrant dashboard or via the SDK before using MATCH filters.
+
+#### AND, OR, NOT — logical operators
+
+Operator precedence: `NOT` (highest) > `AND` > `OR` (lowest). Use parentheses to override.
+
+```sql
+-- AND: both conditions must be true
+SEARCH articles SIMILAR TO 'nlp' LIMIT 10 WHERE category = 'paper' AND year >= 2020
+
+-- OR: either condition can be true
+SEARCH articles SIMILAR TO 'llm' LIMIT 10 WHERE source = 'arxiv' OR source = 'pubmed'
+
+-- NOT: negate a condition
+SEARCH articles SIMILAR TO 'benchmark' LIMIT 10 WHERE NOT status = 'draft'
+
+-- Chained AND (three conditions, all must hold)
+SEARCH articles SIMILAR TO 'deep learning' LIMIT 20
+  WHERE year >= 2019 AND category = 'cv' AND status != 'retracted'
+
+-- Parentheses to group OR inside AND
+SEARCH articles SIMILAR TO 'conference paper' LIMIT 10
+  WHERE (source = 'arxiv' OR source = 'ieee') AND year >= 2022
+
+-- NOT on a parenthesized group
+SEARCH articles SIMILAR TO 'x' LIMIT 5 WHERE NOT (status = 'draft' OR status = 'deleted')
+```
+
+#### Dot-notation for nested fields
+
+Qdrant supports nested payload fields accessed with dot notation. Use the same path syntax in `WHERE`:
+
+```sql
+-- Filter on meta.source nested field
+SEARCH articles SIMILAR TO 'wikipedia' LIMIT 5 WHERE meta.source = 'web'
+
+-- Filter on a deeply nested array field
+SEARCH cities SIMILAR TO 'large city' LIMIT 5 WHERE country.cities[].population > 1000000
+```
+
+#### Combined example
+
+```sql
+-- Semantic search over research papers:
+-- must be from arxiv or IEEE, published 2020–2023, not retracted, with a reviewer assigned
+SEARCH papers SIMILAR TO 'attention mechanism transformers' LIMIT 20
+  WHERE (source = 'arxiv' OR source = 'ieee')
+    AND year BETWEEN 2020 AND 2023
+    AND status != 'retracted'
+    AND reviewer IS NOT NULL
+```
+
+#### Full filter reference
+
+| WHERE syntax | Description |
+|---|---|
+| `field = 'x'` | Exact match |
+| `field != 'x'` | Not equal |
+| `field > n` | Greater than |
+| `field >= n` | Greater than or equal |
+| `field < n` | Less than |
+| `field <= n` | Less than or equal |
+| `field BETWEEN a AND b` | Inclusive range |
+| `field IN ('a', 'b')` | Value in list |
+| `field NOT IN ('a', 'b')` | Value not in list |
+| `field IS NULL` | Field absent or null |
+| `field IS NOT NULL` | Field present and non-null |
+| `field IS EMPTY` | Field is an empty list |
+| `field IS NOT EMPTY` | Field is a non-empty list |
+| `field MATCH 'text'` | All terms present (full-text) |
+| `field MATCH ANY 'text'` | Any term present (full-text) |
+| `field MATCH PHRASE 'text'` | Exact phrase present (full-text) |
+| `A AND B` | Both conditions must hold |
+| `A OR B` | Either condition must hold |
+| `NOT A` | Condition must not hold |
+| `(A OR B) AND C` | Parentheses for grouping |
+| `meta.source = 'x'` | Dot-notation nested field |
+
+---
+
 ### SHOW COLLECTIONS — list collections
 
 Lists all collections in the connected Qdrant instance.
@@ -373,6 +548,12 @@ INSERT INTO docs VALUES {'text': 'hello'} USING MODEL 'BAAI/bge-small-en-v1.5'
 SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5'
 ```
 
+`USING MODEL` and `WHERE` can be combined:
+
+```sql
+SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5' WHERE year >= 2022
+```
+
 ### Commonly available Fastembed models
 
 | Model | Dimensions | Notes |
@@ -410,7 +591,7 @@ The `VALUES` dictionary (and nested dicts) supports these types:
 | Nested dict | `{'key': 'val'}` | Arbitrary nesting |
 | List | `['a', 'b', 1]` | Mixed types allowed |
 
-**Examples of each:**
+**Example using every type:**
 ```sql
 INSERT INTO demo VALUES {
   'text':    'example document',
@@ -468,21 +649,29 @@ QQL can also be used as a Python library without the CLI:
 ```python
 from qql import run_query
 
-# Single query
+# Insert a document
 result = run_query(
-    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world', 'author': 'alice'}",
+    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world', 'author': 'alice', 'year': 2024}",
     url="http://localhost:6333",
 )
 print(result.message)   # "Inserted 1 point [<uuid>]"
 print(result.data)      # {"id": "...", "collection": "notes"}
 
-# Search
+# Basic search
 result = run_query(
     "SEARCH notes SIMILAR TO 'hello' LIMIT 5",
     url="http://localhost:6333",
 )
 for hit in result.data:
     print(hit["score"], hit["id"], hit["payload"])
+
+# Search with a WHERE filter
+result = run_query(
+    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023 AND author != 'bot'",
+    url="http://localhost:6333",
+)
+for hit in result.data:
+    print(hit["score"], hit["payload"])
 ```
 
 Or use the pipeline directly for more control:
@@ -498,12 +687,13 @@ client = QdrantClient(url="http://localhost:6333")
 config = QQLConfig(url="http://localhost:6333")
 executor = Executor(client, config)
 
-query = "SHOW COLLECTIONS"
+query = "SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE category = 'cv'"
 tokens = Lexer().tokenize(query)
 node = Parser(tokens).parse()
 result = executor.execute(node)
 
-print(result.data)   # ["notes", "articles", ...]
+for hit in result.data:
+    print(hit["score"], hit["payload"])
 ```
 
 ### ExecutionResult
@@ -541,14 +731,14 @@ qql/
 │       ├── config.py       # QQLConfig dataclass + ~/.qql/config.json I/O
 │       ├── exceptions.py   # QQLError, QQLSyntaxError, QQLRuntimeError
 │       ├── lexer.py        # Tokenizer: string → List[Token]
-│       ├── ast_nodes.py    # Frozen dataclasses for each statement type
+│       ├── ast_nodes.py    # Frozen dataclasses for each statement and filter type
 │       ├── parser.py       # Recursive descent parser: tokens → AST node
 │       ├── embedder.py     # Fastembed wrapper with per-model cache
-│       └── executor.py     # AST node → Qdrant client call
+│       └── executor.py     # AST node → Qdrant client call + filter conversion
 └── tests/
-    ├── test_lexer.py       # Tokenizer unit tests
-    ├── test_parser.py      # Parser unit tests (all 6 statement types)
-    └── test_executor.py    # Executor unit tests (mocked Qdrant client)
+    ├── test_lexer.py       # Tokenizer unit tests (keywords, operators, dot-paths)
+    ├── test_parser.py      # Parser unit tests (all statements + WHERE filters)
+    └── test_executor.py    # Executor unit tests (mocked Qdrant client + filter builders)
 ```
 
 ---
@@ -561,7 +751,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
 pytest tests/ -v
 ```
 
-Expected output: **54 tests passing**.
+Expected output: **118 tests passing**.
 
 ---
 
@@ -577,3 +767,5 @@ Expected output: **54 tests passing**.
 | `Unexpected token '...'; expected a QQL statement keyword` | Unrecognized statement | Check the query syntax; QQL does not support SQL SELECT |
 | `Unterminated string literal (at position N)` | A string is missing its closing quote | Close the string with a matching `'` or `"` |
 | `Unexpected character '@' (at position N)` | A character not part of QQL syntax | Remove or quote the offending character |
+| `Expected a filter operator after field '...'` | Unknown operator in WHERE clause | Use one of: `=`, `!=`, `>`, `>=`, `<`, `<=`, `IN`, `NOT IN`, `BETWEEN`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `MATCH` |
+| `Expected ')' ...` | Unclosed parenthesis in WHERE clause | Add the missing `)` to close the group |
diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py
index a346c57..b8cd943 100644
--- a/src/qql/ast_nodes.py
+++ b/src/qql/ast_nodes.py
@@ -1,12 +1,122 @@
+from __future__ import annotations
+
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, Union
+
+
+# ── Filter expression leaf nodes ──────────────────────────────────────────────
+
+@dataclass(frozen=True)
+class CompareExpr:
+    """field op literal  — covers =, !=, >, >=, <, <="""
+    field: str
+    op: str   # one of: "=", "!=", ">", ">=", "<", "<="
+    value: str | int | float
+
+
+@dataclass(frozen=True)
+class BetweenExpr:
+    """field BETWEEN low AND high"""
+    field: str
+    low: int | float
+    high: int | float
+
+
+@dataclass(frozen=True)
+class InExpr:
+    """field IN (v1, v2, ...)"""
+    field: str
+    values: tuple[str | int | float, ...]
+
+
+@dataclass(frozen=True)
+class NotInExpr:
+    """field NOT IN (v1, v2, ...)"""
+    field: str
+    values: tuple[str | int | float, ...]
+
+
+@dataclass(frozen=True)
+class IsNullExpr:
+    """field IS NULL"""
+    field: str
+
+
+@dataclass(frozen=True)
+class IsNotNullExpr:
+    """field IS NOT NULL"""
+    field: str
+
+
+@dataclass(frozen=True)
+class IsEmptyExpr:
+    """field IS EMPTY"""
+    field: str
+
+
+@dataclass(frozen=True)
+class IsNotEmptyExpr:
+    """field IS NOT EMPTY"""
+    field: str
+
+
+@dataclass(frozen=True)
+class MatchTextExpr:
+    """field MATCH 'text'  — all terms required (MatchText)"""
+    field: str
+    text: str
+
+
+@dataclass(frozen=True)
+class MatchAnyExpr:
+    """field MATCH ANY 'text'  — any term matches (MatchTextAny)"""
+    field: str
+    text: str
+
+
+@dataclass(frozen=True)
+class MatchPhraseExpr:
+    """field MATCH PHRASE 'text'  — exact phrase (MatchPhrase)"""
+    field: str
+    text: str
+
+
+# ── Filter expression logical nodes ──────────────────────────────────────────
+
+@dataclass(frozen=True)
+class AndExpr:
+    """A AND B AND C — flattened into a single node with N operands."""
+    operands: tuple[FilterExpr, ...]
+
+
+@dataclass(frozen=True)
+class OrExpr:
+    """A OR B OR C"""
+    operands: tuple[FilterExpr, ...]
+
+
+@dataclass(frozen=True)
+class NotExpr:
+    """NOT A"""
+    operand: FilterExpr
+
+
+# Union type covering all filter expression nodes
+FilterExpr = Union[
+    CompareExpr, BetweenExpr, InExpr, NotInExpr,
+    IsNullExpr, IsNotNullExpr, IsEmptyExpr, IsNotEmptyExpr,
+    MatchTextExpr, MatchAnyExpr, MatchPhraseExpr,
+    AndExpr, OrExpr, NotExpr,
+]
+
 
+# ── Statement nodes ───────────────────────────────────────────────────────────
 
 @dataclass(frozen=True)
 class InsertStmt:
     collection: str
     values: dict[str, Any]  # must contain "text" key
-    model: str | None  # None → use default
+    model: str | None       # None → use default
 
 
 @dataclass(frozen=True)
@@ -30,6 +140,7 @@ class SearchStmt:
     query_text: str
     limit: int
     model: str | None
+    query_filter: FilterExpr | None = None  # optional WHERE clause; default keeps existing tests valid
 
 
 @dataclass(frozen=True)
@@ -38,7 +149,7 @@ class DeleteStmt:
     point_id: str | int
 
 
-# Union type for all statement nodes
+# Union type for all top-level statement nodes
 ASTNode = (
     InsertStmt
     | CreateCollectionStmt
diff --git a/src/qql/executor.py b/src/qql/executor.py
index ac7a89c..e1936b0 100644
--- a/src/qql/executor.py
+++ b/src/qql/executor.py
@@ -6,14 +6,45 @@
 
 from qdrant_client import QdrantClient
 from qdrant_client.http.exceptions import UnexpectedResponse
-from qdrant_client.models import Distance, PointStruct, VectorParams
+from qdrant_client.models import (
+    Distance,
+    FieldCondition,
+    Filter,
+    IsEmptyCondition,
+    IsNullCondition,
+    MatchAny,
+    MatchExcept,
+    MatchPhrase,
+    MatchText,
+    MatchTextAny,
+    MatchValue,
+    PayloadField,
+    PointStruct,
+    Range,
+    VectorParams,
+)
 
 from .ast_nodes import (
     ASTNode,
+    AndExpr,
+    BetweenExpr,
+    CompareExpr,
     CreateCollectionStmt,
     DeleteStmt,
     DropCollectionStmt,
+    FilterExpr,
+    InExpr,
     InsertStmt,
+    IsEmptyExpr,
+    IsNotEmptyExpr,
+    IsNotNullExpr,
+    IsNullExpr,
+    MatchAnyExpr,
+    MatchPhraseExpr,
+    MatchTextExpr,
+    NotExpr,
+    NotInExpr,
+    OrExpr,
     SearchStmt,
     ShowCollectionsStmt,
 )
@@ -84,7 +115,6 @@ def _execute_create(self, node: CreateCollectionStmt) -> ExecutionResult:
                 success=True,
                 message=f"Collection '{node.collection}' already exists",
             )
-        # Create with default model dimensions
         embedder = Embedder(self._config.default_model)
         dims = embedder.dimensions
         self._client.create_collection(
@@ -122,11 +152,18 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
         embedder = Embedder(model_name)
         vector = embedder.embed(node.query_text)
 
+        qdrant_filter: Filter | None = None
+        if node.query_filter is not None:
+            qdrant_filter = self._wrap_as_filter(
+                self._build_qdrant_filter(node.query_filter)
+            )
+
         try:
             response = self._client.query_points(
                 collection_name=node.collection,
                 query=vector,
                 limit=node.limit,
+                query_filter=qdrant_filter,
             )
         except UnexpectedResponse as e:
             raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
@@ -160,7 +197,100 @@ def _execute_delete(self, node: DeleteStmt) -> ExecutionResult:
             message=f"Deleted point '{node.point_id}' from '{node.collection}'",
         )
 
-    # ── Helpers ───────────────────────────────────────────────────────────
+    # ── Filter conversion ─────────────────────────────────────────────────
+
+    def _build_qdrant_filter(self, expr: FilterExpr) -> Any:
+        """Convert a FilterExpr AST node into a Qdrant model object.
+
+        Returns one of: Filter, FieldCondition, IsNullCondition, IsEmptyCondition.
+        Use _wrap_as_filter() to guarantee the top-level result is a Filter.
+        """
+        # ── Logical combinators ───────────────────────────────────────────
+        if isinstance(expr, AndExpr):
+            return Filter(must=[self._build_qdrant_filter(op) for op in expr.operands])
+
+        if isinstance(expr, OrExpr):
+            return Filter(should=[self._build_qdrant_filter(op) for op in expr.operands])
+
+        if isinstance(expr, NotExpr):
+            return Filter(must_not=[self._build_qdrant_filter(expr.operand)])
+
+        # ── Comparison ────────────────────────────────────────────────────
+        if isinstance(expr, CompareExpr):
+            if expr.op == "=":
+                return FieldCondition(
+                    key=expr.field, match=MatchValue(value=expr.value)
+                )
+            if expr.op == "!=":
+                return Filter(
+                    must_not=[
+                        FieldCondition(key=expr.field, match=MatchValue(value=expr.value))
+                    ]
+                )
+            _range_key = {">": "gt", ">=": "gte", "<": "lt", "<=": "lte"}[expr.op]
+            return FieldCondition(
+                key=expr.field, range=Range(**{_range_key: expr.value})
+            )
+
+        # ── BETWEEN ───────────────────────────────────────────────────────
+        if isinstance(expr, BetweenExpr):
+            return FieldCondition(
+                key=expr.field, range=Range(gte=expr.low, lte=expr.high)
+            )
+
+        # ── IN / NOT IN ───────────────────────────────────────────────────
+        if isinstance(expr, InExpr):
+            return FieldCondition(
+                key=expr.field, match=MatchAny(any=list(expr.values))
+            )
+
+        if isinstance(expr, NotInExpr):
+            return FieldCondition(
+                key=expr.field,
+                match=MatchExcept(**{"except": list(expr.values)}),
+            )
+
+        # ── IS NULL / IS NOT NULL ─────────────────────────────────────────
+        if isinstance(expr, IsNullExpr):
+            return IsNullCondition(is_null=PayloadField(key=expr.field))
+
+        if isinstance(expr, IsNotNullExpr):
+            return Filter(
+                must_not=[IsNullCondition(is_null=PayloadField(key=expr.field))]
+            )
+
+        # ── IS EMPTY / IS NOT EMPTY ───────────────────────────────────────
+        if isinstance(expr, IsEmptyExpr):
+            return IsEmptyCondition(is_empty=PayloadField(key=expr.field))
+
+        if isinstance(expr, IsNotEmptyExpr):
+            return Filter(
+                must_not=[IsEmptyCondition(is_empty=PayloadField(key=expr.field))]
+            )
+
+        # ── Full-text MATCH ───────────────────────────────────────────────
+        if isinstance(expr, MatchTextExpr):
+            return FieldCondition(key=expr.field, match=MatchText(text=expr.text))
+
+        if isinstance(expr, MatchAnyExpr):
+            return FieldCondition(
+                key=expr.field, match=MatchTextAny(text_any=expr.text)
+            )
+
+        if isinstance(expr, MatchPhraseExpr):
+            return FieldCondition(
+                key=expr.field, match=MatchPhrase(phrase=expr.text)
+            )
+
+        raise QQLRuntimeError(f"Unknown filter expression type: {type(expr)}")
+
+    def _wrap_as_filter(self, qdrant_expr: Any) -> Filter:
+        """Ensure the top-level expression is a Filter (required by query_points)."""
+        if isinstance(qdrant_expr, Filter):
+            return qdrant_expr
+        return Filter(must=[qdrant_expr])
+
+    # ── Collection helpers ────────────────────────────────────────────────
 
     def _ensure_collection(self, name: str, vector_size: int) -> None:
         """Create the collection if it doesn't exist. Raises on dimension mismatch."""
diff --git a/src/qql/lexer.py b/src/qql/lexer.py
index 7ed13a4..e3bf8e3 100644
--- a/src/qql/lexer.py
+++ b/src/qql/lexer.py
@@ -5,7 +5,7 @@
 
 
 class TokenKind(Enum):
-    # Keywords
+    # ── Statement keywords ────────────────────────────────────────────────
     INSERT = auto()
     INTO = auto()
     COLLECTION = auto()
@@ -24,24 +24,45 @@ class TokenKind(Enum):
     FROM = auto()
     WHERE = auto()
     ID = auto()
-    # Literals & names
+    # ── Filter keywords ───────────────────────────────────────────────────
+    AND = auto()
+    OR = auto()
+    NOT = auto()
+    IN = auto()
+    BETWEEN = auto()
+    IS = auto()
+    NULL = auto()
+    EMPTY = auto()
+    MATCH = auto()
+    ANY = auto()
+    PHRASE = auto()
+    # ── Literals & names ─────────────────────────────────────────────────
     IDENTIFIER = auto()
     STRING = auto()
     INTEGER = auto()
     FLOAT = auto()
-    # Punctuation
+    # ── Punctuation ───────────────────────────────────────────────────────
     LBRACE = auto()
     RBRACE = auto()
     LBRACKET = auto()
     RBRACKET = auto()
+    LPAREN = auto()
+    RPAREN = auto()
     COLON = auto()
     COMMA = auto()
     EQUALS = auto()
-    # Control
+    # ── Comparison operators ──────────────────────────────────────────────
+    NOT_EQUALS = auto()   # !=
+    GT = auto()           # >
+    GTE = auto()          # >=
+    LT = auto()           # <
+    LTE = auto()          # <=
+    # ── Control ───────────────────────────────────────────────────────────
     EOF = auto()
 
 
 _KEYWORDS: dict[str, TokenKind] = {
+    # Statement keywords
     "INSERT": TokenKind.INSERT,
     "INTO": TokenKind.INTO,
     "COLLECTION": TokenKind.COLLECTION,
@@ -60,6 +81,18 @@ class TokenKind(Enum):
     "FROM": TokenKind.FROM,
     "WHERE": TokenKind.WHERE,
     "ID": TokenKind.ID,
+    # Filter keywords
+    "AND": TokenKind.AND,
+    "OR": TokenKind.OR,
+    "NOT": TokenKind.NOT,
+    "IN": TokenKind.IN,
+    "BETWEEN": TokenKind.BETWEEN,
+    "IS": TokenKind.IS,
+    "NULL": TokenKind.NULL,
+    "EMPTY": TokenKind.EMPTY,
+    "MATCH": TokenKind.MATCH,
+    "ANY": TokenKind.ANY,
+    "PHRASE": TokenKind.PHRASE,
 }
 
 
@@ -83,7 +116,7 @@ def tokenize(self, query: str) -> list[Token]:
 
             ch = query[i]
 
-            # Single-character punctuation
+            # ── Braces / brackets / punctuation ──────────────────────────
             if ch == "{":
                 tokens.append(Token(TokenKind.LBRACE, "{", i))
                 i += 1
@@ -96,17 +129,45 @@ def tokenize(self, query: str) -> list[Token]:
             elif ch == "]":
                 tokens.append(Token(TokenKind.RBRACKET, "]", i))
                 i += 1
+            elif ch == "(":
+                tokens.append(Token(TokenKind.LPAREN, "(", i))
+                i += 1
+            elif ch == ")":
+                tokens.append(Token(TokenKind.RPAREN, ")", i))
+                i += 1
             elif ch == ":":
                 tokens.append(Token(TokenKind.COLON, ":", i))
                 i += 1
             elif ch == ",":
                 tokens.append(Token(TokenKind.COMMA, ",", i))
                 i += 1
+
+            # ── Comparison operators (multi-char look-ahead) ──────────────
             elif ch == "=":
                 tokens.append(Token(TokenKind.EQUALS, "=", i))
                 i += 1
+            elif ch == "!":
+                if i + 1 < n and query[i + 1] == "=":
+                    tokens.append(Token(TokenKind.NOT_EQUALS, "!=", i))
+                    i += 2
+                else:
+                    raise QQLSyntaxError(f"Unexpected character '!'", i)
+            elif ch == ">":
+                if i + 1 < n and query[i + 1] == "=":
+                    tokens.append(Token(TokenKind.GTE, ">=", i))
+                    i += 2
+                else:
+                    tokens.append(Token(TokenKind.GT, ">", i))
+                    i += 1
+            elif ch == "<":
+                if i + 1 < n and query[i + 1] == "=":
+                    tokens.append(Token(TokenKind.LTE, "<=", i))
+                    i += 2
+                else:
+                    tokens.append(Token(TokenKind.LT, "<", i))
+                    i += 1
 
-            # String literals
+            # ── String literals ───────────────────────────────────────────
             elif ch in ('"', "'"):
                 start = i
                 quote = ch
@@ -114,7 +175,6 @@ def tokenize(self, query: str) -> list[Token]:
                 buf: list[str] = []
                 while i < n:
                     if query[i] == "\\" and i + 1 < n:
-                        # Handle escape sequences
                         next_ch = query[i + 1]
                         if next_ch == "n":
                             buf.append("\n")
@@ -136,7 +196,7 @@ def tokenize(self, query: str) -> list[Token]:
                     raise QQLSyntaxError("Unterminated string literal", start)
                 tokens.append(Token(TokenKind.STRING, "".join(buf), start))
 
-            # Numbers: optional leading minus
+            # ── Numbers: optional leading minus ───────────────────────────
             elif ch.isdigit() or (ch == "-" and i + 1 < n and query[i + 1].isdigit()):
                 start = i
                 if ch == "-":
@@ -151,14 +211,40 @@ def tokenize(self, query: str) -> list[Token]:
                 else:
                     tokens.append(Token(TokenKind.INTEGER, query[start:i], start))
 
-            # Identifiers and keywords
+            # ── Identifiers, keywords, and dot-notation field paths ────────
             elif ch.isalpha() or ch == "_":
                 start = i
+                # Collect the base word
                 while i < n and (query[i].isalnum() or query[i] == "_"):
                     i += 1
+                # Extend for dotted field paths: consume ".word" and "[].word" segments
+                # so that meta.source and country.cities[].population become single tokens.
+                while i < n:
+                    if query[i] == "." and i + 1 < n and (query[i + 1].isalpha() or query[i + 1] == "_"):
+                        # ".identifier" segment
+                        i += 1  # consume "."
+                        while i < n and (query[i].isalnum() or query[i] == "_"):
+                            i += 1
+                    elif (
+                        i + 2 < n
+                        and query[i : i + 3] == "[]."
+                        and i + 3 < n
+                        and (query[i + 3].isalpha() or query[i + 3] == "_")
+                    ):
+                        # "[]." array marker segment
+                        i += 3  # consume "[]."
+                        while i < n and (query[i].isalnum() or query[i] == "_"):
+                            i += 1
+                    else:
+                        break
                 word = query[start:i]
-                upper = word.upper()
-                kind = _KEYWORDS.get(upper, TokenKind.IDENTIFIER)
+                # Keyword lookup uses the uppercased first segment only for dotted paths
+                # so that field names like "meta.from" are always IDENTIFIER, not keywords.
+                first_segment = word.split(".")[0].upper()
+                if "." not in word and first_segment in _KEYWORDS:
+                    kind = _KEYWORDS[first_segment]
+                else:
+                    kind = TokenKind.IDENTIFIER
                 tokens.append(Token(kind, word, start))
 
             else:
diff --git a/src/qql/parser.py b/src/qql/parser.py
index 06eb619..17fe0f9 100644
--- a/src/qql/parser.py
+++ b/src/qql/parser.py
@@ -2,16 +2,41 @@
 
 from .ast_nodes import (
     ASTNode,
+    AndExpr,
+    BetweenExpr,
+    CompareExpr,
     CreateCollectionStmt,
     DeleteStmt,
     DropCollectionStmt,
+    FilterExpr,
+    InExpr,
     InsertStmt,
+    IsEmptyExpr,
+    IsNotEmptyExpr,
+    IsNotNullExpr,
+    IsNullExpr,
+    MatchAnyExpr,
+    MatchPhraseExpr,
+    MatchTextExpr,
+    NotExpr,
+    NotInExpr,
+    OrExpr,
     SearchStmt,
     ShowCollectionsStmt,
 )
 from .exceptions import QQLSyntaxError
 from .lexer import Token, TokenKind
 
+# Comparison operator token → string symbol mapping
+_CMP_OPS: dict[TokenKind, str] = {
+    TokenKind.EQUALS:     "=",
+    TokenKind.NOT_EQUALS: "!=",
+    TokenKind.GT:         ">",
+    TokenKind.GTE:        ">=",
+    TokenKind.LT:         "<",
+    TokenKind.LTE:        "<=",
+}
+
 
 class Parser:
     def __init__(self, tokens: list[Token]) -> None:
@@ -88,7 +113,17 @@ def _parse_search(self) -> SearchStmt:
             self._advance()
             self._expect(TokenKind.MODEL)
             model = self._expect(TokenKind.STRING).value
-        return SearchStmt(collection=collection, query_text=query_text, limit=limit, model=model)
+        query_filter: FilterExpr | None = None
+        if self._peek().kind == TokenKind.WHERE:
+            self._advance()  # consume WHERE
+            query_filter = self._parse_filter_expr()
+        return SearchStmt(
+            collection=collection,
+            query_text=query_text,
+            limit=limit,
+            model=model,
+            query_filter=query_filter,
+        )
 
     def _parse_delete(self) -> DeleteStmt:
         self._expect(TokenKind.DELETE)
@@ -110,10 +145,187 @@ def _parse_delete(self) -> DeleteStmt:
             )
         return DeleteStmt(collection=collection, point_id=point_id)
 
-    # ── Value parsers ─────────────────────────────────────────────────────
+    # ── WHERE clause filter parsing (precedence: NOT > AND > OR) ─────────
+
+    def _parse_filter_expr(self) -> FilterExpr:
+        """filter_or ::= filter_and { OR filter_and }"""
+        left = self._parse_filter_and()
+        if self._peek().kind != TokenKind.OR:
+            return left
+        operands: list[FilterExpr] = [left]
+        while self._peek().kind == TokenKind.OR:
+            self._advance()  # consume OR
+            operands.append(self._parse_filter_and())
+        return OrExpr(operands=tuple(operands))
+
+    def _parse_filter_and(self) -> FilterExpr:
+        """filter_and ::= filter_not { AND filter_not }"""
+        left = self._parse_filter_not()
+        if self._peek().kind != TokenKind.AND:
+            return left
+        operands: list[FilterExpr] = [left]
+        while self._peek().kind == TokenKind.AND:
+            self._advance()  # consume AND
+            operands.append(self._parse_filter_not())
+        return AndExpr(operands=tuple(operands))
+
+    def _parse_filter_not(self) -> FilterExpr:
+        """filter_not ::= NOT filter_not | filter_primary"""
+        if self._peek().kind == TokenKind.NOT:
+            self._advance()  # consume NOT
+            return NotExpr(operand=self._parse_filter_not())  # right-recursive
+        return self._parse_filter_primary()
+
+    def _parse_filter_primary(self) -> FilterExpr:
+        """filter_primary ::= '(' filter_expr ')' | predicate"""
+        if self._peek().kind == TokenKind.LPAREN:
+            self._advance()  # consume (
+            expr = self._parse_filter_expr()
+            self._expect(TokenKind.RPAREN)
+            return expr
+        return self._parse_predicate()
+
+    def _parse_predicate(self) -> FilterExpr:
+        """All leaf filter conditions."""
+        field = self._parse_field_path()
+        tok = self._peek()
+
+        # ── IS NULL / IS NOT NULL / IS EMPTY / IS NOT EMPTY ──────────────
+        if tok.kind == TokenKind.IS:
+            self._advance()  # consume IS
+            if self._peek().kind == TokenKind.NOT:
+                self._advance()  # consume NOT
+                if self._peek().kind == TokenKind.NULL:
+                    self._advance()
+                    return IsNotNullExpr(field=field)
+                if self._peek().kind == TokenKind.EMPTY:
+                    self._advance()
+                    return IsNotEmptyExpr(field=field)
+                raise QQLSyntaxError(
+                    "Expected NULL or EMPTY after IS NOT", self._peek().pos
+                )
+            if self._peek().kind == TokenKind.NULL:
+                self._advance()
+                return IsNullExpr(field=field)
+            if self._peek().kind == TokenKind.EMPTY:
+                self._advance()
+                return IsEmptyExpr(field=field)
+            raise QQLSyntaxError(
+                "Expected NULL, NOT NULL, EMPTY, or NOT EMPTY after IS", self._peek().pos
+            )
+
+        # ── IN ( ... ) ────────────────────────────────────────────────────
+        if tok.kind == TokenKind.IN:
+            self._advance()  # consume IN
+            values = self._parse_literal_list()
+            return InExpr(field=field, values=tuple(values))
+
+        # ── NOT IN ( ... ) ────────────────────────────────────────────────
+        if tok.kind == TokenKind.NOT:
+            self._advance()  # consume NOT
+            self._expect(TokenKind.IN)
+            values = self._parse_literal_list()
+            return NotInExpr(field=field, values=tuple(values))
+
+        # ── BETWEEN low AND high ──────────────────────────────────────────
+        if tok.kind == TokenKind.BETWEEN:
+            self._advance()  # consume BETWEEN
+            low = self._parse_number()
+            self._expect(TokenKind.AND)  # consumes AND as separator (not logical AND)
+            high = self._parse_number()
+            return BetweenExpr(field=field, low=low, high=high)
+
+        # ── MATCH / MATCH ANY / MATCH PHRASE ─────────────────────────────
+        if tok.kind == TokenKind.MATCH:
+            self._advance()  # consume MATCH
+            if self._peek().kind == TokenKind.ANY:
+                self._advance()
+                text = self._expect(TokenKind.STRING).value
+                return MatchAnyExpr(field=field, text=text)
+            if self._peek().kind == TokenKind.PHRASE:
+                self._advance()
+                text = self._expect(TokenKind.STRING).value
+                return MatchPhraseExpr(field=field, text=text)
+            # plain MATCH — all terms required
+            text = self._expect(TokenKind.STRING).value
+            return MatchTextExpr(field=field, text=text)
+
+        # ── Comparison operators: =, !=, >, >=, <, <= ────────────────────
+        if tok.kind in _CMP_OPS:
+            op = _CMP_OPS[tok.kind]
+            self._advance()
+            value = self._parse_literal()
+            return CompareExpr(field=field, op=op, value=value)
+
+        raise QQLSyntaxError(
+            f"Expected a filter operator after field '{field}', got '{tok.value}'",
+            tok.pos,
+        )
+
+    # ── Filter parsing helpers ────────────────────────────────────────────
+
+    def _parse_field_path(self) -> str:
+        """Dot-notation paths are already single IDENTIFIER tokens from the lexer."""
+        tok = self._peek()
+        if tok.kind != TokenKind.IDENTIFIER:
+            raise QQLSyntaxError(
+                f"Expected a field name, got '{tok.value}'", tok.pos
+            )
+        self._advance()
+        return tok.value
+
+    def _parse_literal(self) -> str | int | float:
+        """STRING | INTEGER | FLOAT"""
+        tok = self._peek()
+        if tok.kind == TokenKind.STRING:
+            self._advance()
+            return tok.value
+        if tok.kind == TokenKind.INTEGER:
+            self._advance()
+            return int(tok.value)
+        if tok.kind == TokenKind.FLOAT:
+            self._advance()
+            return float(tok.value)
+        raise QQLSyntaxError(
+            f"Expected a literal value (string, integer, or float), got '{tok.value}'",
+            tok.pos,
+        )
+
+    def _parse_number(self) -> int | float:
+        """INTEGER | FLOAT only (used by BETWEEN)."""
+        tok = self._peek()
+        if tok.kind == TokenKind.INTEGER:
+            self._advance()
+            return int(tok.value)
+        if tok.kind == TokenKind.FLOAT:
+            self._advance()
+            return float(tok.value)
+        raise QQLSyntaxError(
+            f"Expected a number, got '{tok.value}'", tok.pos
+        )
+
+    def _parse_literal_list(self) -> list[str | int | float]:
+        """'(' literal { ',' literal } [','] ')'  — used by IN / NOT IN."""
+        self._expect(TokenKind.LPAREN)
+        items: list[str | int | float] = []
+        if self._peek().kind == TokenKind.RPAREN:
+            self._advance()
+            return items
+        while True:
+            items.append(self._parse_literal())
+            if self._peek().kind == TokenKind.COMMA:
+                self._advance()
+                if self._peek().kind == TokenKind.RPAREN:
+                    break  # trailing comma allowed
+            else:
+                break
+        self._expect(TokenKind.RPAREN)
+        return items
+
+    # ── Dict / value parsers (for INSERT VALUES) ──────────────────────────
 
     def _parse_identifier(self) -> str:
-        """Accept either a bare IDENTIFIER or a quoted STRING as a name."""
+        """Accept either a bare IDENTIFIER or a quoted STRING as a collection name."""
         tok = self._peek()
         if tok.kind == TokenKind.IDENTIFIER:
             self._advance()
@@ -144,9 +356,8 @@ def _parse_dict(self) -> dict[str, Any]:
             result[key] = value
             if self._peek().kind == TokenKind.COMMA:
                 self._advance()
-                # Allow trailing comma
                 if self._peek().kind == TokenKind.RBRACE:
-                    break
+                    break  # trailing comma
             else:
                 break
         self._expect(TokenKind.RBRACE)
@@ -180,6 +391,10 @@ def _parse_value(self) -> Any:
         if tok.kind == TokenKind.INTEGER:
             self._advance()
             return int(tok.value)
+        if tok.kind == TokenKind.NULL:
+            # NULL is now a keyword token
+            self._advance()
+            return None
         if tok.kind == TokenKind.IDENTIFIER:
             upper = tok.value.upper()
             if upper == "TRUE":
@@ -189,6 +404,8 @@ def _parse_value(self) -> Any:
                 self._advance()
                 return False
             if upper == "NULL":
+                # Fallback: handle 'null' that arrived as IDENTIFIER (shouldn't happen
+                # after lexer change, but kept for safety)
                 self._advance()
                 return None
             self._advance()
diff --git a/tests/test_executor.py b/tests/test_executor.py
index 3fd0a65..b3e45af 100644
--- a/tests/test_executor.py
+++ b/tests/test_executor.py
@@ -166,3 +166,218 @@ def test_delete_nonexistent_collection_raises(self, executor, mock_client):
         node = DeleteStmt(collection="ghost", point_id="x")
         with pytest.raises(QQLRuntimeError, match="does not exist"):
             executor.execute(node)
+
+
+class TestSearchWithFilter:
+    """Tests for _build_qdrant_filter and filter pass-through in _execute_search."""
+
+    def _search_node(self, query_filter=None):
+        return SearchStmt(
+            collection="docs", query_text="hello", limit=5, model=None,
+            query_filter=query_filter,
+        )
+
+    def test_search_without_filter_passes_none_to_qdrant(self, executor, mock_client, mocker):
+        mock_client.collection_exists.return_value = True
+        mock_response = mocker.MagicMock()
+        mock_response.points = []
+        mock_client.query_points.return_value = mock_response
+
+        executor.execute(self._search_node())
+
+        call_kwargs = mock_client.query_points.call_args.kwargs
+        assert call_kwargs.get("query_filter") is None
+
+    def test_search_with_filter_passes_filter_to_qdrant(self, executor, mock_client, mocker):
+        mock_client.collection_exists.return_value = True
+        mock_response = mocker.MagicMock()
+        mock_response.points = []
+        mock_client.query_points.return_value = mock_response
+
+        from qql.ast_nodes import CompareExpr
+        node = self._search_node(query_filter=CompareExpr(field="cat", op="=", value="ai"))
+        executor.execute(node)
+
+        call_kwargs = mock_client.query_points.call_args.kwargs
+        assert call_kwargs.get("query_filter") is not None
+
+    # ── _build_qdrant_filter unit tests (no Qdrant connection needed) ─────
+
+    def test_build_equality(self, executor):
+        from qdrant_client.models import FieldCondition, Filter, MatchValue
+        from qql.ast_nodes import CompareExpr
+
+        result = executor._wrap_as_filter(
+            executor._build_qdrant_filter(CompareExpr(field="status", op="=", value="active"))
+        )
+        assert isinstance(result, Filter)
+        fc = result.must[0]
+        assert isinstance(fc, FieldCondition)
+        assert fc.match == MatchValue(value="active")
+
+    def test_build_not_equals(self, executor):
+        from qdrant_client.models import Filter
+        from qql.ast_nodes import CompareExpr
+
+        result = executor._build_qdrant_filter(CompareExpr(field="s", op="!=", value="x"))
+        assert isinstance(result, Filter)
+        assert result.must_not is not None and len(result.must_not) == 1
+
+    def test_build_range_gt(self, executor):
+        from qdrant_client.models import FieldCondition
+        from qql.ast_nodes import CompareExpr
+
+        result = executor._build_qdrant_filter(CompareExpr(field="score", op=">", value=0.8))
+        assert isinstance(result, FieldCondition)
+        assert result.range.gt == pytest.approx(0.8)
+
+    def test_build_range_gte(self, executor):
+        from qdrant_client.models import FieldCondition
+        from qql.ast_nodes import CompareExpr
+
+        result = executor._build_qdrant_filter(CompareExpr(field="year", op=">=", value=2020))
+        assert isinstance(result, FieldCondition)
+        assert result.range.gte == 2020
+
+    def test_build_range_lt(self, executor):
+        from qdrant_client.models import FieldCondition
+        from qql.ast_nodes import CompareExpr
+
+        result = executor._build_qdrant_filter(CompareExpr(field="year", op="<", value=2024))
+        assert isinstance(result, FieldCondition)
+        assert result.range.lt == 2024
+
+    def test_build_range_lte(self, executor):
+        from qdrant_client.models import FieldCondition
+        from qql.ast_nodes import CompareExpr
+
+        result = executor._build_qdrant_filter(CompareExpr(field="year", op="<=", value=2023))
+        assert isinstance(result, FieldCondition)
+        assert result.range.lte == 2023
+
+    def test_build_between(self, executor):
+        from qdrant_client.models import FieldCondition
+        from qql.ast_nodes import BetweenExpr
+
+        result = executor._build_qdrant_filter(BetweenExpr(field="year", low=2018, high=2023))
+        assert isinstance(result, FieldCondition)
+        assert result.range.gte == 2018
+        assert result.range.lte == 2023
+
+    def test_build_in(self, executor):
+        from qdrant_client.models import FieldCondition, MatchAny
+        from qql.ast_nodes import InExpr
+
+        result = executor._build_qdrant_filter(InExpr(field="status", values=("a", "b")))
+        assert isinstance(result, FieldCondition)
+        assert isinstance(result.match, MatchAny)
+
+    def test_build_not_in(self, executor):
+        from qdrant_client.models import FieldCondition, MatchExcept
+        from qql.ast_nodes import NotInExpr
+
+        result = executor._build_qdrant_filter(NotInExpr(field="status", values=("deleted",)))
+        assert isinstance(result, FieldCondition)
+        assert isinstance(result.match, MatchExcept)
+
+    def test_build_is_null(self, executor):
+        from qdrant_client.models import IsNullCondition
+        from qql.ast_nodes import IsNullExpr
+
+        result = executor._build_qdrant_filter(IsNullExpr(field="reviewer"))
+        assert isinstance(result, IsNullCondition)
+
+    def test_build_is_not_null(self, executor):
+        from qdrant_client.models import Filter, IsNullCondition
+        from qql.ast_nodes import IsNotNullExpr
+
+        result = executor._build_qdrant_filter(IsNotNullExpr(field="reviewer"))
+        assert isinstance(result, Filter)
+        assert isinstance(result.must_not[0], IsNullCondition)
+
+    def test_build_is_empty(self, executor):
+        from qdrant_client.models import IsEmptyCondition
+        from qql.ast_nodes import IsEmptyExpr
+
+        result = executor._build_qdrant_filter(IsEmptyExpr(field="tags"))
+        assert isinstance(result, IsEmptyCondition)
+
+    def test_build_is_not_empty(self, executor):
+        from qdrant_client.models import Filter, IsEmptyCondition
+        from qql.ast_nodes import IsNotEmptyExpr
+
+        result = executor._build_qdrant_filter(IsNotEmptyExpr(field="tags"))
+        assert isinstance(result, Filter)
+        assert isinstance(result.must_not[0], IsEmptyCondition)
+
+    def test_build_match_text(self, executor):
+        from qdrant_client.models import FieldCondition, MatchText
+        from qql.ast_nodes import MatchTextExpr
+
+        result = executor._build_qdrant_filter(MatchTextExpr(field="title", text="vector db"))
+        assert isinstance(result, FieldCondition)
+        assert isinstance(result.match, MatchText)
+        assert result.match.text == "vector db"
+
+    def test_build_match_any(self, executor):
+        from qdrant_client.models import FieldCondition, MatchTextAny
+        from qql.ast_nodes import MatchAnyExpr
+
+        result = executor._build_qdrant_filter(MatchAnyExpr(field="title", text="nlp ai"))
+        assert isinstance(result, FieldCondition)
+        assert isinstance(result.match, MatchTextAny)
+
+    def test_build_match_phrase(self, executor):
+        from qdrant_client.models import FieldCondition, MatchPhrase
+        from qql.ast_nodes import MatchPhraseExpr
+
+        result = executor._build_qdrant_filter(MatchPhraseExpr(field="title", text="quick fox"))
+        assert isinstance(result, FieldCondition)
+        assert isinstance(result.match, MatchPhrase)
+
+    def test_build_and(self, executor):
+        from qdrant_client.models import Filter
+        from qql.ast_nodes import AndExpr, CompareExpr
+
+        expr = AndExpr(operands=(
+            CompareExpr(field="a", op="=", value="x"),
+            CompareExpr(field="b", op="=", value="y"),
+        ))
+        result = executor._build_qdrant_filter(expr)
+        assert isinstance(result, Filter)
+        assert len(result.must) == 2
+
+    def test_build_or(self, executor):
+        from qdrant_client.models import Filter
+        from qql.ast_nodes import CompareExpr, OrExpr
+
+        expr = OrExpr(operands=(
+            CompareExpr(field="src", op="=", value="arxiv"),
+            CompareExpr(field="src", op="=", value="ieee"),
+        ))
+        result = executor._build_qdrant_filter(expr)
+        assert isinstance(result, Filter)
+        assert len(result.should) == 2
+
+    def test_build_not(self, executor):
+        from qdrant_client.models import Filter
+        from qql.ast_nodes import CompareExpr, NotExpr
+
+        expr = NotExpr(operand=CompareExpr(field="st", op="=", value="draft"))
+        result = executor._build_qdrant_filter(expr)
+        assert isinstance(result, Filter)
+        assert result.must_not is not None
+
+    def test_wrap_as_filter_passthrough(self, executor):
+        from qdrant_client.models import Filter
+
+        f = Filter(must=[])
+        assert executor._wrap_as_filter(f) is f
+
+    def test_wrap_as_filter_wraps_field_condition(self, executor):
+        from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+        fc = FieldCondition(key="x", match=MatchValue(value="y"))
+        result = executor._wrap_as_filter(fc)
+        assert isinstance(result, Filter)
+        assert result.must[0] is fc
diff --git a/tests/test_lexer.py b/tests/test_lexer.py
index 3b13b09..93a00ea 100644
--- a/tests/test_lexer.py
+++ b/tests/test_lexer.py
@@ -105,6 +105,82 @@ def test_error_includes_position(self):
         assert exc_info.value.pos is not None
 
 
+class TestNewOperators:
+    def test_not_equals(self):
+        tokens = tokenize("field != 'x'")
+        assert tokens[1].kind == TokenKind.NOT_EQUALS
+        assert tokens[1].value == "!="
+
+    def test_gt(self):
+        tokens = tokenize("score > 0.5")
+        assert tokens[1].kind == TokenKind.GT
+        assert tokens[1].value == ">"
+
+    def test_gte(self):
+        tokens = tokenize("score >= 0.5")
+        assert tokens[1].kind == TokenKind.GTE
+        assert tokens[1].value == ">="
+
+    def test_lt(self):
+        tokens = tokenize("year < 2024")
+        assert tokens[1].kind == TokenKind.LT
+        assert tokens[1].value == "<"
+
+    def test_lte(self):
+        tokens = tokenize("year <= 2023")
+        assert tokens[1].kind == TokenKind.LTE
+        assert tokens[1].value == "<="
+
+    def test_lparen_rparen(self):
+        ks = kinds("(a OR b)")
+        assert TokenKind.LPAREN in ks
+        assert TokenKind.RPAREN in ks
+
+    def test_filter_keywords(self):
+        ks = kinds("AND OR NOT IN BETWEEN IS NULL EMPTY MATCH ANY PHRASE")
+        assert TokenKind.AND     in ks
+        assert TokenKind.OR      in ks
+        assert TokenKind.NOT     in ks
+        assert TokenKind.IN      in ks
+        assert TokenKind.BETWEEN in ks
+        assert TokenKind.IS      in ks
+        assert TokenKind.NULL    in ks
+        assert TokenKind.EMPTY   in ks
+        assert TokenKind.MATCH   in ks
+        assert TokenKind.ANY     in ks
+        assert TokenKind.PHRASE  in ks
+
+    def test_filter_keywords_case_insensitive(self):
+        ks = kinds("and or not in between is null empty match any phrase")
+        assert TokenKind.AND in ks
+        assert TokenKind.OR  in ks
+        assert TokenKind.NOT in ks
+
+    def test_dotted_identifier(self):
+        tokens = tokenize("meta.source")
+        assert tokens[0].kind == TokenKind.IDENTIFIER
+        assert tokens[0].value == "meta.source"
+
+    def test_three_level_dotted_identifier(self):
+        tokens = tokenize("a.b.c")
+        assert tokens[0].kind == TokenKind.IDENTIFIER
+        assert tokens[0].value == "a.b.c"
+
+    def test_nested_array_path(self):
+        tokens = tokenize("country.cities[].population")
+        assert tokens[0].kind == TokenKind.IDENTIFIER
+        assert tokens[0].value == "country.cities[].population"
+
+    def test_gt_does_not_consume_equals_sign(self):
+        # ">" followed by non-"=" should be GT only
+        tokens = tokenize("a > b")
+        assert tokens[1].kind == TokenKind.GT
+
+    def test_bare_exclamation_raises(self):
+        with pytest.raises(QQLSyntaxError):
+            tokenize("field ! 'x'")
+
+
 class TestEOF:
     def test_ends_with_eof(self):
         tokens = tokenize("hello")
diff --git a/tests/test_parser.py b/tests/test_parser.py
index d296c8b..adcb2ed 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1,10 +1,24 @@
 import pytest
 
 from qql.ast_nodes import (
+    AndExpr,
+    BetweenExpr,
+    CompareExpr,
     CreateCollectionStmt,
     DeleteStmt,
     DropCollectionStmt,
+    InExpr,
     InsertStmt,
+    IsEmptyExpr,
+    IsNotEmptyExpr,
+    IsNotNullExpr,
+    IsNullExpr,
+    MatchAnyExpr,
+    MatchPhraseExpr,
+    MatchTextExpr,
+    NotExpr,
+    NotInExpr,
+    OrExpr,
     SearchStmt,
     ShowCollectionsStmt,
 )
@@ -132,3 +146,183 @@ def test_missing_collection_name(self):
     def test_empty_input(self):
         with pytest.raises(QQLSyntaxError):
             parse("")
+
+
+class TestSearchWithWhere:
+    def test_no_where_clause(self):
+        node = parse("SEARCH docs SIMILAR TO 'ml' LIMIT 5")
+        assert node.query_filter is None
+
+    def test_equality_filter(self):
+        node = parse("SEARCH docs SIMILAR TO 'ml' LIMIT 5 WHERE category = 'paper'")
+        f = node.query_filter
+        assert isinstance(f, CompareExpr)
+        assert f.field == "category"
+        assert f.op == "="
+        assert f.value == "paper"
+
+    def test_not_equals_filter(self):
+        node = parse("SEARCH docs SIMILAR TO 'ml' LIMIT 5 WHERE status != 'draft'")
+        f = node.query_filter
+        assert isinstance(f, CompareExpr)
+        assert f.op == "!="
+        assert f.value == "draft"
+
+    def test_range_gt(self):
+        node = parse("SEARCH docs SIMILAR TO 'ml' LIMIT 5 WHERE score > 0.8")
+        f = node.query_filter
+        assert isinstance(f, CompareExpr)
+        assert f.op == ">"
+        assert f.value == pytest.approx(0.8)
+
+    def test_range_gte(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE year >= 2020")
+        assert isinstance(node.query_filter, CompareExpr)
+        assert node.query_filter.op == ">="
+        assert node.query_filter.value == 2020
+
+    def test_range_lt(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE year < 2024")
+        assert isinstance(node.query_filter, CompareExpr)
+        assert node.query_filter.op == "<"
+
+    def test_range_lte(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE year <= 2023")
+        assert isinstance(node.query_filter, CompareExpr)
+        assert node.query_filter.op == "<="
+
+    def test_between(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE year BETWEEN 2018 AND 2023")
+        f = node.query_filter
+        assert isinstance(f, BetweenExpr)
+        assert f.field == "year"
+        assert f.low == 2018
+        assert f.high == 2023
+
+    def test_in_expr(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE status IN ('a', 'b')")
+        f = node.query_filter
+        assert isinstance(f, InExpr)
+        assert f.field == "status"
+        assert f.values == ("a", "b")
+
+    def test_in_with_trailing_comma(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE status IN ('a', 'b',)")
+        assert isinstance(node.query_filter, InExpr)
+        assert len(node.query_filter.values) == 2
+
+    def test_not_in_expr(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE status NOT IN ('deleted', 'archived')")
+        f = node.query_filter
+        assert isinstance(f, NotInExpr)
+        assert f.values == ("deleted", "archived")
+
+    def test_is_null(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE reviewer IS NULL")
+        f = node.query_filter
+        assert isinstance(f, IsNullExpr)
+        assert f.field == "reviewer"
+
+    def test_is_not_null(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE reviewer IS NOT NULL")
+        assert isinstance(node.query_filter, IsNotNullExpr)
+        assert node.query_filter.field == "reviewer"
+
+    def test_is_empty(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE tags IS EMPTY")
+        assert isinstance(node.query_filter, IsEmptyExpr)
+        assert node.query_filter.field == "tags"
+
+    def test_is_not_empty(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE tags IS NOT EMPTY")
+        assert isinstance(node.query_filter, IsNotEmptyExpr)
+
+    def test_match_text(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE title MATCH 'deep learning'")
+        f = node.query_filter
+        assert isinstance(f, MatchTextExpr)
+        assert f.field == "title"
+        assert f.text == "deep learning"
+
+    def test_match_any(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE title MATCH ANY 'nlp ai'")
+        f = node.query_filter
+        assert isinstance(f, MatchAnyExpr)
+        assert f.text == "nlp ai"
+
+    def test_match_phrase(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE title MATCH PHRASE 'neural net'")
+        assert isinstance(node.query_filter, MatchPhraseExpr)
+
+    def test_and_expr_two_operands(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE a = '1' AND b = '2'")
+        f = node.query_filter
+        assert isinstance(f, AndExpr)
+        assert len(f.operands) == 2
+        assert all(isinstance(op, CompareExpr) for op in f.operands)
+
+    def test_and_expr_three_operands_flattened(self):
+        node = parse(
+            "SEARCH d SIMILAR TO 'x' LIMIT 5 WHERE a = '1' AND b = '2' AND c = '3'"
+        )
+        f = node.query_filter
+        assert isinstance(f, AndExpr)
+        assert len(f.operands) == 3  # flattened, not binary-nested
+
+    def test_or_expr(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE a = '1' OR b = '2'")
+        f = node.query_filter
+        assert isinstance(f, OrExpr)
+        assert len(f.operands) == 2
+
+    def test_not_expr(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE NOT status = 'draft'")
+        f = node.query_filter
+        assert isinstance(f, NotExpr)
+        assert isinstance(f.operand, CompareExpr)
+
+    def test_parenthesized_or_inside_and(self):
+        node = parse(
+            "SEARCH docs SIMILAR TO 'x' LIMIT 5 "
+            "WHERE (src = 'a' OR src = 'b') AND year > 2020"
+        )
+        f = node.query_filter
+        assert isinstance(f, AndExpr)
+        assert isinstance(f.operands[0], OrExpr)
+        assert isinstance(f.operands[1], CompareExpr)
+
+    def test_dotted_field_path(self):
+        node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE meta.source = 'web'")
+        assert isinstance(node.query_filter, CompareExpr)
+        assert node.query_filter.field == "meta.source"
+
+    def test_using_model_then_where(self):
+        node = parse(
+            "SEARCH docs SIMILAR TO 'x' LIMIT 5 "
+            "USING MODEL 'my-model' WHERE category = 'paper'"
+        )
+        assert node.model == "my-model"
+        assert isinstance(node.query_filter, CompareExpr)
+
+    def test_between_and_does_not_confuse_logical_and(self):
+        # The AND inside BETWEEN must not be consumed by the logical AND loop
+        node = parse(
+            "SEARCH d SIMILAR TO 'x' LIMIT 5 WHERE year BETWEEN 2018 AND 2023 AND category = 'ai'"
+        )
+        f = node.query_filter
+        assert isinstance(f, AndExpr)
+        assert isinstance(f.operands[0], BetweenExpr)
+        assert isinstance(f.operands[1], CompareExpr)
+        assert len(f.operands) == 2
+
+    def test_not_negates_parenthesized_group(self):
+        node = parse(
+            "SEARCH d SIMILAR TO 'x' LIMIT 5 WHERE NOT (a = '1' OR b = '2')"
+        )
+        f = node.query_filter
+        assert isinstance(f, NotExpr)
+        assert isinstance(f.operand, OrExpr)
+
+    def test_missing_rparen_raises(self):
+        with pytest.raises(QQLSyntaxError):
+            parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE (a = '1'")