diff --git a/README.md b/README.md
index b45358f..76f416c 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,13 @@
 # QQL — Qdrant Query Language
 
-A SQL-like CLI for [Qdrant](https://qdrant.tech), a high-performance vector database. Instead of writing Python SDK calls, you write natural query statements to insert, search, manage, and delete vector data — including rich SQL-style `WHERE` filters and hybrid dense+sparse vector search.
+> SQL-like query language and CLI for [Qdrant](https://qdrant.tech) vector database.
+
+[![PyPI version](https://img.shields.io/pypi/v/qql-cli?color=blue&label=PyPI)](https://pypi.org/project/qql-cli/)
+[![Python 3.12+](https://img.shields.io/pypi/pyversions/qql-cli)](https://pypi.org/project/qql-cli/)
+[![MIT License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
+[![Tests](https://img.shields.io/badge/tests-375%20passing-brightgreen)](tests/)
+
+Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
 
 ```
 qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
@@ -12,55 +19,15 @@ qql> SEARCH notes SIMILAR TO 'vector storage engines' LIMIT 3 WHERE year >= 2023
 ────────┼──────────────────────────────────────┼──────────────────────────────────────
  0.8931 │ 3f2e1a4b-8c91-4d0e-b123-abc123def456 │ {'text': 'Qdrant is a ...', 'author': 'alice', 'year': 2024}
 
-qql> SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 USING HYBRID
-✓ Found 1 result(s) (hybrid)
- Score  │ ID                                   │ Payload
-────────┼──────────────────────────────────────┼──────────────────────────────────────
- 0.9102 │ 3f2e1a4b-8c91-4d0e-b123-abc123def456 │ {'text': 'Qdrant is a ...', 'author': 'alice', 'year': 2024}
-
 qql> SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 USING HYBRID RERANK
 ✓ Found 1 result(s) (hybrid, reranked)
  Score  │ ID                                   │ Payload
 ────────┼──────────────────────────────────────┼──────────────────────────────────────
- 5.3714 │ 3f2e1a4b-8c91-4d0e-b123-abc123def456 │ {'text': 'Qdrant is a ...', 'author': 'alice', 'year': 2024}
+ 5.3754 │ 3f2e1a4b-8c91-4d0e-b123-abc123def456 │ {'text': 'Qdrant is a ...', 'author': 'alice', 'year': 2024}
 ```
 
 ---
 
-## Table of Contents
-
-- [How It Works](#how-it-works)
-- [Installation](#installation)
-- [Connecting to Qdrant](#connecting-to-qdrant)
-- [The QQL Shell](#the-qql-shell)
-- [All QQL Operations](#all-qql-operations)
-  - [INSERT — add a point](#insert--add-a-point)
-  - [INSERT BULK — batch insert](#insert-bulk--batch-insert-multiple-points)
-  - [SEARCH — find similar points](#search--find-similar-points)
-  - [RECOMMEND — retrieve by example IDs](#recommend--retrieve-by-example-ids)
-  - [Query-Time Search Params (`EXACT`, `WITH`)](#query-time-search-params-exact-with)
-  - [WHERE Clause Filters](#where-clause-filters)
-  - [Hybrid Search (USING HYBRID)](#hybrid-search-using-hybrid)
-  - [Cross-Encoder Reranking (RERANK)](#cross-encoder-reranking-rerank)
-  - [SHOW COLLECTIONS — list collections](#show-collections--list-collections)
-  - [CREATE COLLECTION — create a collection](#create-collection--create-a-collection)
-  - [Quantization — QUANTIZE clause](#quantization--quantize-clause)
-  - [CREATE INDEX — create a payload index](#create-index--create-a-payload-index)
-  - [DROP COLLECTION — delete a collection](#drop-collection--delete-a-collection)
-  - [DELETE — remove points](#delete--remove-points)
-- [Script Files](#script-files)
-  - [EXECUTE — run a script file](#execute--run-a-qql-script-file)
-  - [DUMP COLLECTION — export to script](#dump-collection--export-collection-to-a-qql-script-file)
-- [Embedding Models](#embedding-models)
-- [Value Types in Dictionaries](#value-types-in-dictionaries)
-- [Configuration File](#configuration-file)
-- [Programmatic Usage](#programmatic-usage)
-- [Project Structure](#project-structure)
-- [Running Tests](#running-tests)
-- [Error Reference](#error-reference)
-
----
-
 ## How It Works
 
 QQL is a thin translation layer between a SQL-like query language and the Qdrant Python client. Every statement you type goes through three stages:
@@ -89,1424 +56,72 @@ When you run `INSERT`, the `text` field is automatically converted into a dense
 
 **Requirements:** Python 3.12+, a running Qdrant instance.
 
-### From PyPI
-
 ```bash
 pip install qql-cli
 ```
 
-### From source (development)
-
-```bash
-git clone <repo>
-cd qql
-pip install -e .
-```
-
-Or with [uv](https://docs.astral.sh/uv/):
-
-```bash
-uv sync
-```
-
-After installation the `qql` command is available globally in your terminal.
-
----
-
-## Connecting to Qdrant
-
-Before running any queries you must connect to a Qdrant instance. The connection config is saved to `~/.qql/config.json` and reused automatically in future sessions.
-
-### Local Qdrant (no API key)
+Connect to a Qdrant instance:
 
 ```bash
+# Local
 qql connect --url http://localhost:6333
-```
 
-### Qdrant Cloud (with API key)
-
-```bash
+# Qdrant Cloud
 qql connect --url https://<your-cluster>.qdrant.io --secret <your-api-key>
 ```
 
-On success you will see:
-
-```
-Connecting to http://localhost:6333...
-Connected. Config saved to ~/.qql/config.json
-
-QQL Interactive Shell  •  http://localhost:6333
-Type help for available commands or exit to quit.
-
-qql>
-```
-
-### Starting Qdrant locally with Docker
-
-If you do not have a Qdrant instance running yet:
-
-```bash
-docker run -p 6333:6333 qdrant/qdrant
-```
-
-### Disconnecting
-
-To remove the saved connection config:
-
-```bash
-qql disconnect
-```
+Then type `qql` to open the interactive shell.
 
 ---
 
-## The QQL Shell
-
-Once connected, running `qql` alone (no arguments) reads the saved config and opens the interactive shell:
-
-```bash
-qql
-```
+## Documentation
 
-Inside the shell:
+Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.github.io/qql](https://pavanjava.github.io/qql)**:
 
-| Input | Effect |
+| Topic | Description |
 |---|---|
-| A QQL statement | Executes it and prints the result |
-| `help` or `?` or `\h` | Prints a reference of all available commands |
-| `exit` or `quit` or `\q` or `:q` | Exits the shell |
-| Empty line / Enter | Ignored |
-| Ctrl-D or Ctrl-C | Exits the shell |
-
-All keywords are **case-insensitive** — `INSERT`, `insert`, and `Insert` all work.
-
----
-
-## All QQL Operations
-
-### INSERT — add a point
-
-Inserts a new document into a collection. The `text` field is **mandatory** — it is automatically vectorized and stored as the point's vector. All other fields become searchable payload (metadata).
-
-If the collection does not exist yet, it is **created automatically** with the correct vector dimensions.
-
-If you include an `id` field in `VALUES`, QQL uses it as the Qdrant point ID. Supported explicit IDs are unsigned integers or UUID strings. If you omit `id`, QQL generates a UUID automatically.
-
-**Syntax:**
-```
-INSERT INTO COLLECTION <collection_name> VALUES {<dict>}
-INSERT INTO COLLECTION <collection_name> VALUES {<dict>} USING MODEL '<model_name>'
-INSERT INTO COLLECTION <collection_name> VALUES {<dict>} USING HYBRID
-INSERT INTO COLLECTION <collection_name> VALUES {<dict>} USING HYBRID DENSE MODEL '<model>' SPARSE MODEL '<model>'
-```
-
-**Examples:**
-
-Minimal insert (text only):
-```sql
-INSERT INTO COLLECTION articles VALUES {'text': 'Qdrant supports cosine similarity search'}
-```
-
-Insert with metadata:
-```sql
-INSERT INTO COLLECTION articles VALUES {
-  'id': 1001,
-  'text': 'Neural networks learn representations from data',
-  'author': 'alice',
-  'category': 'ml',
-  'year': 2024,
-  'published': true
-}
-```
-
-Insert with a specific embedding model:
-```sql
-INSERT INTO COLLECTION articles VALUES {'text': 'hello world'} USING MODEL 'BAAI/bge-small-en-v1.5'
-```
-
-Insert into a hybrid collection (dense + sparse BM25 vectors):
-```sql
-INSERT INTO COLLECTION articles VALUES {'text': 'Attention is all you need'} USING HYBRID
-```
-
-Insert with custom models for both dense and sparse:
-```sql
-INSERT INTO COLLECTION articles VALUES {'text': 'hello world'}
-  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' SPARSE MODEL 'prithivida/Splade_PP_en_v1'
-```
-
-**What happens internally:**
-1. The `text` value is embedded into a dense vector using the configured model.
-2. In hybrid mode, a sparse BM25 vector is also generated.
-3. If `id` is provided, it is used as the point ID; otherwise a UUID is auto-generated.
-4. All fields except `id` are stored in the payload.
-5. The point is upserted into Qdrant.
-
-**Rules:**
-- `text` is always required. Omitting it raises an error.
-- `id`, when provided, must be an unsigned integer or UUID string.
-- If the collection already exists with a different vector size (from a different model), an error is raised with a clear message.
-- Hybrid inserts require a hybrid collection (created with `CREATE COLLECTION ... HYBRID` or auto-created on first `USING HYBRID` insert).
-
----
-
-### INSERT BULK — batch insert multiple points
-
-Inserts multiple documents in a single statement. Each item in the array must contain a `"text"` key. All items are embedded and upserted to Qdrant in **one batched call**, which is significantly faster than issuing one `INSERT` per record.
-
-If the collection does not exist yet, it is **created automatically** on the first bulk insert.
-
-Each record may optionally include an `id` field. This is the preferred way to keep seed data deterministic and to make follow-up operations like `RECOMMEND` or `DELETE` reproducible.
-
-**Syntax:**
-```
-INSERT BULK INTO COLLECTION <collection_name> VALUES [<dict>, <dict>, ...]
-INSERT BULK INTO COLLECTION <collection_name> VALUES [<dict>, ...] USING MODEL '<model_name>'
-INSERT BULK INTO COLLECTION <collection_name> VALUES [<dict>, ...] USING HYBRID
-INSERT BULK INTO COLLECTION <collection_name> VALUES [<dict>, ...] USING HYBRID DENSE MODEL '<model>' SPARSE MODEL '<model>'
-```
-
-**Examples:**
-
-Minimal bulk insert (text only):
-```sql
-INSERT BULK INTO COLLECTION articles VALUES [
-  {'text': 'Qdrant supports cosine similarity search'},
-  {'text': 'Sparse BM25 vectors enable keyword retrieval'},
-  {'text': 'Hybrid search combines dense and sparse results via RRF'}
-]
-```
-
-Bulk insert with metadata:
-```sql
-INSERT BULK INTO COLLECTION articles VALUES [
-  {'id': 1001, 'text': 'Attention is all you need', 'author': 'vaswani', 'year': 2017},
-  {'id': 1002, 'text': 'BERT: Pre-training of deep bidirectional transformers', 'author': 'devlin', 'year': 2018},
-  {'id': 1003, 'text': 'Language models are few-shot learners', 'author': 'brown', 'year': 2020}
-]
-```
-
-Bulk insert into a hybrid collection:
-```sql
-INSERT BULK INTO COLLECTION articles VALUES [
-  {'text': 'Dense retrieval with FAISS', 'domain': 'ir'},
-  {'text': 'Sparse retrieval with BM25', 'domain': 'ir'}
-] USING HYBRID
-```
-
-**Rules:**
-- Every dict in the array must contain a `"text"` key. Missing `text` on any item raises an error with the offending index.
-- An empty array `[]` raises an error.
-- `id`, when provided, must be an unsigned integer or UUID string.
-- Supports all the same `USING` clauses as single `INSERT`.
-
----
-
-### SEARCH — find similar points
-
-Performs a **semantic similarity search**: your query text is embedded with the same model used during insert, then Qdrant finds the nearest vectors by cosine distance.
-
-An optional `WHERE` clause filters the candidate set **before** similarity ranking so you only get results that match both the semantic query and the payload conditions.
-
-**Syntax:**
-```
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n>
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING MODEL '<model_name>'
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING MODEL '<model>'] WHERE <filter>
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING SPARSE [MODEL '<sparse_model>']
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> EXACT
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false }
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] RERANK [MODEL '<reranker_model>']
-```
-
-**Examples:**
-
-Basic search, return top 5 results:
-```sql
-SEARCH articles SIMILAR TO 'machine learning algorithms' LIMIT 5
-```
-
-Search only papers published after 2020:
-```sql
-SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020
-```
-
-Search within a specific category, excluding drafts:
-```sql
-SEARCH articles SIMILAR TO 'neural networks' LIMIT 5 WHERE category = 'ml' AND status != 'draft'
-```
-
-Hybrid search (combines dense semantic + sparse BM25 keyword retrieval via RRF):
-```sql
-SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 USING HYBRID
-```
-
-Hybrid search with a WHERE filter:
-```sql
-SEARCH articles SIMILAR TO 'transformers' LIMIT 10 USING HYBRID WHERE year >= 2020
-```
-
-Sparse-only search (queries only the `sparse` named vector — useful for pure keyword retrieval):
-```sql
-SEARCH medical_knowledge SIMILAR TO 'beta blocker contraindications' LIMIT 5 USING SPARSE
-```
-
-Sparse-only with a custom SPLADE model:
-```sql
-SEARCH medical_knowledge SIMILAR TO 'beta blocker contraindications' LIMIT 5 USING SPARSE MODEL 'prithivida/Splade_PP_en_v1'
-```
-
-Exact search for recall debugging:
-```sql
-SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 EXACT
-```
-
-Search with query-time HNSW tuning:
-```sql
-SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { hnsw_ef: 128 }
-```
-
-**Output:**
-
-Results are displayed as a table with three columns:
-
-```
- Score  │ ID                                   │ Payload
-────────┼──────────────────────────────────────┼──────────────────────────────────
- 0.9241 │ 3f2e1a4b-...                          │ {'text': 'Neural networks...', 'author': 'alice'}
- 0.8817 │ 7a1b2c3d-...                          │ {'text': 'Attention is all...', 'tags': [...]}
-```
-
-- **Score** — similarity score. Higher is more relevant.
-- **ID** — the point ID returned by Qdrant. This may be an integer or a UUID string.
-- **Payload** — all fields stored alongside the vector.
-
-**Important:** Use the same model for SEARCH as you used for INSERT. Mixing models produces meaningless scores because the vectors live in different spaces.
+| [Getting Started](docs/getting-started.md) | Installation, connecting, first queries |
+| [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types |
+| [SEARCH / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, hybrid, reranking, recommendations |
+| [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
+| [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/binary/product), CREATE INDEX |
+| [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
+| [Programmatic Usage](docs/programmatic.md) | Use QQL as a Python library |
+| [Reference: Models / Config / Errors](docs/reference.md) | Embedding models, config file, error reference |
 
 ---
 
-### RECOMMEND — retrieve by example IDs
-
-Performs a Qdrant recommendation query using existing point IDs as positive and optional negative examples.
-
-This is useful when you already know which stored points represent the kind of result you want. Qdrant uses those examples to retrieve nearby points, and QQL automatically excludes the seed IDs from the results.
+## Quick Syntax Reference
 
-**Syntax:**
 ```sql
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n>
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) NEGATIVE IDS (<id>, ...) LIMIT <n>
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) STRATEGY '<strategy>' LIMIT <n>
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> WHERE <filter>
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> OFFSET <n>
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> SCORE THRESHOLD <f>
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> WITH { exact: true, hnsw_ef: <n> }
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> LOOKUP FROM <collection>
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> LOOKUP FROM <collection> VECTOR '<name>'
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> USING '<vector_name>'
-```
+-- Insert
+INSERT INTO COLLECTION articles VALUES {'text': '...', 'year': 2024}
+INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}]
 
-**Examples:**
+-- Search
+SEARCH articles SIMILAR TO 'query' LIMIT 10
+SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020
+SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID
+SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID RERANK
 
-Recommend more results like two known articles:
-```sql
+-- Recommend
 RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
-```
-
-Recommend similar results while steering away from one bad example:
-```sql
-RECOMMEND FROM articles POSITIVE IDS (1001, 1002) NEGATIVE IDS (1009) LIMIT 5
-```
-
-Use Qdrant's `best_score` recommendation strategy:
-```sql
-RECOMMEND FROM articles POSITIVE IDS (1001) STRATEGY 'best_score' LIMIT 10
-```
-
-Recommend only within a filtered subset:
-```sql
-RECOMMEND FROM articles POSITIVE IDS (1001) LIMIT 5 WHERE year >= 2020 AND status = 'published'
-```
-
-Paginate recommendations (skip first 5, return next 10):
-```sql
-RECOMMEND FROM articles POSITIVE IDS (1001) LIMIT 10 OFFSET 5
-```
-
-Filter out low-confidence recommendations:
-```sql
-RECOMMEND FROM articles POSITIVE IDS (1001) LIMIT 10 SCORE THRESHOLD 0.5
-```
-
-Exact KNN baseline for recommendations:
-```sql
-RECOMMEND FROM articles POSITIVE IDS (1001) LIMIT 5 WITH { exact: true }
-```
-
-Cross-collection recommend (look up example IDs from another collection):
-```sql
-RECOMMEND FROM target_collection
-  POSITIVE IDS ('a')
-  LOOKUP FROM source_collection VECTOR 'dense'
-  LIMIT 5
-```
-
-Recommend using a specific named vector in the target collection:
-```sql
-RECOMMEND FROM articles
-  POSITIVE IDS (1001)
-  USING 'sparse'
-  LIMIT 5
-```
-
-Full-featured recommend:
-```sql
-RECOMMEND FROM articles
-  POSITIVE IDS (1001, 1002)
-  NEGATIVE IDS (1009)
-  STRATEGY 'best_score'
-  LOOKUP FROM other_collection VECTOR 'dense'
-  USING 'dense'
-  LIMIT 10
-  OFFSET 5
-  SCORE THRESHOLD 0.5
-  WHERE year >= 2020
-  WITH { exact: true }
-```
-
-**Supported strategies:**
-
-- `average_vector`
-- `best_score`
-- `sum_scores`
-
-**Clause order:** `POSITIVE IDS` → `NEGATIVE IDS` → `STRATEGY` → `LOOKUP FROM` → `USING` → `LIMIT` → `OFFSET` → `SCORE THRESHOLD` → `WHERE` → `WITH`
-
----
-
-### Query-Time Search Params (`EXACT`, `WITH`)
-
-QQL supports a small set of Qdrant query-time search parameters on `SEARCH` statements.
-
-Use these when you want to debug retrieval quality or tune recall without changing collection-level settings.
-
-#### Supported options
-
-| Syntax | Effect |
-|---|---|
-| `EXACT` | Shorthand for exact KNN search (`exact=true`) |
-| `WITH { hnsw_ef: 128 }` | Increase HNSW exploration at query time |
-| `WITH { exact: true }` | Force exact KNN explicitly |
-| `WITH { acorn: true }` | Enable ACORN for filtered queries |
-
-#### Syntax notes
-
-- `EXACT` can appear after `LIMIT` or after `RERANK`
-- `WITH { ... }` can appear after `WHERE` and/or `RERANK`
-- Supported `WITH` keys are only `hnsw_ef`, `exact`, and `acorn`
-
-#### Examples
-
-```sql
--- Exact KNN baseline
-SEARCH articles SIMILAR TO 'programming language' LIMIT 5 EXACT
-
--- Raise HNSW ef at query time
-SEARCH articles SIMILAR TO 'transformers' LIMIT 10 WITH { hnsw_ef: 256 }
-
--- Filtered search with ACORN
-SEARCH articles SIMILAR TO 'RAG' LIMIT 10 WHERE tag = 'li' WITH { acorn: true }
-
--- Hybrid search with exact mode
-SEARCH articles SIMILAR TO 'attention' LIMIT 10 USING HYBRID EXACT
-```
-
-These options are passed through to Qdrant `SearchParams` during execution.
-
----
-
-### WHERE Clause Filters
-
-The `WHERE` clause lets you filter on any payload field using SQL-style predicates. All standard comparison, range, membership, null-check, and full-text operators are supported.
-
-#### Equality and inequality
-
-```sql
--- Exact match
-SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE category = 'paper'
-
--- Not equal
-SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE status != 'draft'
-```
-
-#### Range comparisons
-
-```sql
-SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE score > 0.8
-SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE year < 2024
-SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE score >= 0.75
-SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE year <= 2023
-```
-
-#### BETWEEN … AND
-
-```sql
--- Inclusive range (equivalent to year >= 2018 AND year <= 2023)
-SEARCH articles SIMILAR TO 'history of ai' LIMIT 10 WHERE year BETWEEN 2018 AND 2023
-```
-
-#### IN and NOT IN
-
-```sql
-SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status IN ('published', 'reviewed')
-SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status NOT IN ('deleted', 'archived')
-```
-
-#### IS NULL and IS NOT NULL
-
-```sql
-SEARCH articles SIMILAR TO 'peer review' LIMIT 5 WHERE reviewer IS NULL
-SEARCH articles SIMILAR TO 'peer review' LIMIT 5 WHERE reviewer IS NOT NULL
-```
-
-#### IS EMPTY and IS NOT EMPTY
-
-```sql
-SEARCH articles SIMILAR TO 'untagged' LIMIT 5 WHERE tags IS EMPTY
-SEARCH articles SIMILAR TO 'categorized' LIMIT 5 WHERE tags IS NOT EMPTY
-```
-
-#### Full-text MATCH
-
-```sql
--- All terms must appear in the field (requires a Qdrant full-text index)
-SEARCH articles SIMILAR TO 'search' LIMIT 10 WHERE title MATCH 'vector database'
-
--- Any term can match
-SEARCH articles SIMILAR TO 'search' LIMIT 10 WHERE title MATCH ANY 'embedding retrieval'
 
--- Exact phrase must appear
-SEARCH articles SIMILAR TO 'search' LIMIT 10 WHERE title MATCH PHRASE 'semantic search'
-```
-
-#### AND, OR, NOT — logical operators
-
-Operator precedence: `NOT` (highest) > `AND` > `OR` (lowest). Use parentheses to override.
-
-```sql
--- AND: both conditions must be true
-SEARCH articles SIMILAR TO 'nlp' LIMIT 10 WHERE category = 'paper' AND year >= 2020
-
--- OR: either condition can be true
-SEARCH articles SIMILAR TO 'llm' LIMIT 10 WHERE source = 'arxiv' OR source = 'pubmed'
-
--- NOT: negate a condition
-SEARCH articles SIMILAR TO 'benchmark' LIMIT 10 WHERE NOT status = 'draft'
-
--- Parentheses to group OR inside AND
-SEARCH articles SIMILAR TO 'conference paper' LIMIT 10
-  WHERE (source = 'arxiv' OR source = 'ieee') AND year >= 2022
-
--- NOT on a parenthesized group
-SEARCH articles SIMILAR TO 'x' LIMIT 5 WHERE NOT (status = 'draft' OR status = 'deleted')
-```
-
-#### Dot-notation for nested fields
-
-```sql
-SEARCH articles SIMILAR TO 'wikipedia' LIMIT 5 WHERE meta.source = 'web'
-SEARCH cities SIMILAR TO 'large city' LIMIT 5 WHERE country.cities[].population > 1000000
-```
-
-#### WHERE also works in hybrid mode
-
-```sql
-SEARCH articles SIMILAR TO 'deep learning' LIMIT 10
-  USING HYBRID WHERE year BETWEEN 2020 AND 2024 AND status = 'published'
-```
-
-#### Full filter reference
-
-| WHERE syntax | Description |
-|---|---|
-| `field = 'x'` | Exact match |
-| `field != 'x'` | Not equal |
-| `field > n` | Greater than |
-| `field >= n` | Greater than or equal |
-| `field < n` | Less than |
-| `field <= n` | Less than or equal |
-| `field BETWEEN a AND b` | Inclusive range |
-| `field IN ('a', 'b')` | Value in list |
-| `field NOT IN ('a', 'b')` | Value not in list |
-| `field IS NULL` | Field absent or null |
-| `field IS NOT NULL` | Field present and non-null |
-| `field IS EMPTY` | Field is an empty list |
-| `field IS NOT EMPTY` | Field is a non-empty list |
-| `field MATCH 'text'` | All terms present (full-text) |
-| `field MATCH ANY 'text'` | Any term present (full-text) |
-| `field MATCH PHRASE 'text'` | Exact phrase present (full-text) |
-| `A AND B` | Both conditions must hold |
-| `A OR B` | Either condition must hold |
-| `NOT A` | Condition must not hold |
-| `(A OR B) AND C` | Parentheses for grouping |
-| `meta.source = 'x'` | Dot-notation nested field |
-
----
-
-### Hybrid Search (USING HYBRID)
-
-Hybrid search combines **dense semantic vectors** and **sparse BM25 keyword vectors** in a single query and merges the results with Qdrant's **Reciprocal Rank Fusion (RRF)** algorithm. This typically outperforms either method alone — semantic search handles paraphrases and synonyms, while BM25 handles exact keyword matches.
-
-#### How it works internally
-
-1. Both a dense vector (`TextEmbedding`) and a sparse BM25 vector (`SparseTextEmbedding`) are generated from your query text.
-2. Qdrant fetches the top candidates from each index independently (`prefetch limit = LIMIT × 4`).
-3. The two result lists are merged using RRF — a rank-based fusion that does not require score normalization.
-4. The final top-N results are returned.
-
-#### Step 1: Create a hybrid collection
+-- Collections
+CREATE COLLECTION articles
+CREATE COLLECTION articles HYBRID
+CREATE COLLECTION articles QUANTIZE SCALAR
+CREATE INDEX ON COLLECTION articles FOR year TYPE integer
+SHOW COLLECTIONS
+DROP COLLECTION articles
 
-A hybrid collection stores both a named dense vector (`"dense"`) and a named sparse vector (`"sparse"`):
+-- Delete
+DELETE FROM articles WHERE id = '3f2e1a4b-...'
+DELETE FROM articles WHERE year < 2020
 
-```sql
--- Shorthand (backward compatible)
-CREATE COLLECTION articles HYBRID
-
--- USING form — allows specifying a dense model
-CREATE COLLECTION articles USING HYBRID
-CREATE COLLECTION articles USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
-```
-
-This is equivalent to calling Qdrant with:
-```python
-vectors_config={"dense": VectorParams(size=384, distance=COSINE)},
-sparse_vectors_config={"sparse": SparseVectorParams(modifier=IDF)}
-```
-
-#### Step 2: Insert with hybrid vectors
-
-```sql
--- Uses default dense model + Qdrant/bm25 sparse model
-INSERT INTO COLLECTION articles VALUES {
-  'text': 'Attention is all you need',
-  'author': 'Vaswani et al.',
-  'year': 2017
-} USING HYBRID
-```
-
-If the collection does not exist yet, it is created automatically as a hybrid collection on the first `USING HYBRID` insert.
-
-#### Step 3: Search with hybrid retrieval
-
-```sql
--- Basic hybrid search
-SEARCH articles SIMILAR TO 'transformer architecture' LIMIT 10 USING HYBRID
-
--- Hybrid search with a WHERE filter
-SEARCH articles SIMILAR TO 'attention' LIMIT 10 USING HYBRID WHERE year >= 2017
-
--- Hybrid with custom dense model
-SEARCH articles SIMILAR TO 'embeddings' LIMIT 5
-  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
-
--- Hybrid with both custom models
-SEARCH articles SIMILAR TO 'sparse retrieval' LIMIT 5
-  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' SPARSE MODEL 'prithivida/Splade_PP_en_v1'
-
--- Order of DENSE MODEL / SPARSE MODEL doesn't matter
-SEARCH articles SIMILAR TO 'sparse retrieval' LIMIT 5
-  USING HYBRID SPARSE MODEL 'prithivida/Splade_PP_en_v1' DENSE MODEL 'BAAI/bge-base-en-v1.5'
-```
-
-#### Model defaults in hybrid mode
-
-| Argument | Default |
-|---|---|
-| Dense model | `self._config.default_model` (same as non-hybrid) |
-| Sparse model | `Qdrant/bm25` |
-
-Both can be overridden independently with `DENSE MODEL` and `SPARSE MODEL`.
-
-#### Dense vs. hybrid — when to use which
-
-| Situation | Recommendation |
-|---|---|
-| Semantic similarity (paraphrasing, synonyms) | Dense only |
-| Exact keyword matching (product codes, names) | Hybrid or BM25-only |
-| General-purpose retrieval (unknown query distribution) | Hybrid |
-| Low latency / small collection | Dense only |
-
-#### Supported sparse models (Fastembed)
-
-| Model | Notes |
-|---|---|
-| `Qdrant/bm25` | Default. Classic BM25 with IDF weighting |
-| `prithivida/Splade_PP_en_v1` | SPLADE++ English, strong keyword + semantic overlap |
-| `Qdrant/Unicoil` | UniCOIL sparse encoder |
-
----
-
-### Cross-Encoder Reranking (RERANK)
-
-Appending `RERANK` to any SEARCH statement activates a **second-pass relevance scoring** step using a [cross-encoder](https://www.sbert.net/examples/applications/cross-encoder/README.html) model. Unlike bi-encoders (which encode query and document independently), a cross-encoder processes the **(query, document)** pair jointly, producing a more accurate relevance score at the cost of extra compute.
-
-#### How it works internally
-
-1. Qdrant executes the normal dense or hybrid search, but fetches `LIMIT × 4` candidates instead of just `LIMIT` — giving the reranker enough material to work with.
-2. Each candidate's `payload["text"]` is paired with the original query text.
-3. The cross-encoder scores all (query, document) pairs in one batch.
-4. Results are sorted **descending by cross-encoder score** and sliced to `LIMIT`.
-5. The `score` column in the output reflects the cross-encoder relevance score (raw logits — higher is more relevant).
-
-#### Syntax
-
-```
-SEARCH <name> SIMILAR TO '<query>' LIMIT <n> RERANK
-SEARCH <name> SIMILAR TO '<query>' LIMIT <n> RERANK MODEL '<cross_encoder_model>'
-```
-
-`RERANK` must come **after** any `USING` and `WHERE` clauses:
-
-```
-SEARCH ... LIMIT n [USING ...] [WHERE ...] RERANK [MODEL '...']
-```
-
-#### Examples
-
-Dense search + rerank (default cross-encoder):
-```sql
-SEARCH articles SIMILAR TO 'machine learning for healthcare' LIMIT 5 RERANK
-```
-
-Hybrid search + rerank (best of all three worlds):
-```sql
-SEARCH articles SIMILAR TO 'attention mechanism in transformers' LIMIT 10 USING HYBRID RERANK
-```
-
-Dense search + WHERE filter + rerank:
-```sql
-SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020 RERANK
-```
-
-Custom cross-encoder model:
-```sql
-SEARCH articles SIMILAR TO 'semantic search' LIMIT 5
-  RERANK MODEL 'cross-encoder/ms-marco-MiniLM-L-6-v2'
-```
-
-All clauses combined:
-```sql
-SEARCH articles SIMILAR TO 'neural IR' LIMIT 10
-  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
-  WHERE year >= 2020
-  RERANK MODEL 'cross-encoder/ms-marco-MiniLM-L-6-v2'
-```
-
-#### Default cross-encoder model
-
-```
-cross-encoder/ms-marco-MiniLM-L-6-v2
-```
-
-- A lightweight but effective passage reranker fine-tuned on MS MARCO.
-- Downloaded on first use and cached locally by Fastembed.
-- No additional packages needed — `TextCrossEncoder` is included in the `fastembed` package.
-
-#### Commonly available cross-encoder models (Fastembed)
-
-| Model | Notes |
-|---|---|
-| `cross-encoder/ms-marco-MiniLM-L-6-v2` | Default. Fast and accurate for passage reranking |
-| `cross-encoder/ms-marco-MiniLM-L-12-v2` | Larger, higher quality, slower |
-| `BAAI/bge-reranker-base` | BGE reranker, strong general-purpose performance |
-| `BAAI/bge-reranker-large` | Highest quality BGE reranker, slower |
-
-#### When to use RERANK
-
-| Situation | Recommendation |
-|---|---|
-| High-precision retrieval (legal, medical, research) | Add `RERANK` |
-| Small LIMIT (top-3 or top-5 results) | Very effective — reranker focuses precision |
-| Low latency required | Skip `RERANK` (adds ~100–500 ms per batch) |
-| Large collections with keyword-heavy queries | `USING HYBRID RERANK` for best coverage + precision |
-| General-purpose semantic search | Optional; `RERANK` improves quality at mild cost |
-
-> **Note on scores:** After reranking, the `score` column shows the cross-encoder's raw logit (can be any real number, unbounded). Do not compare reranked scores to non-reranked cosine similarity scores — they are on different scales.
-
----
-
-### SHOW COLLECTIONS — list collections
-
-Lists all collections in the connected Qdrant instance.
-
-**Syntax:**
-```
-SHOW COLLECTIONS
-```
-
-**Example:**
-```sql
-SHOW COLLECTIONS
-```
-
-**Output:**
-```
-✓ 3 collection(s) found
-┌──────────────────┐
-│ Collection       │
-├──────────────────┤
-│ articles         │
-│ notes            │
-│ products         │
-└──────────────────┘
-```
-
----
-
-### CREATE COLLECTION — create a collection
-
-Explicitly creates a new empty collection. Collections are also created automatically on the first INSERT, so this command is optional — use it when you want to pre-create a collection before inserting data.
-
-**Syntax:**
-```
-CREATE COLLECTION <collection_name>
-CREATE COLLECTION <collection_name> HYBRID
-CREATE COLLECTION <collection_name> USING MODEL '<model_name>'
-CREATE COLLECTION <collection_name> USING HYBRID
-CREATE COLLECTION <collection_name> USING HYBRID DENSE MODEL '<model>'
-```
-
-**Examples:**
-
-Dense-only collection (standard, uses default model dimensions):
-```sql
-CREATE COLLECTION research_papers
-```
-
-Dense-only collection pinned to a specific model (768-dimensional):
-```sql
-CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5'
-```
-
-Hybrid collection (dense + sparse BM25, default models):
-```sql
-CREATE COLLECTION research_papers HYBRID
-```
-
-Hybrid collection with a custom dense model:
-```sql
-CREATE COLLECTION research_papers USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
-```
-
-When `USING MODEL` is omitted, the collection uses the **default embedding model's dimensions** (384 for `all-MiniLM-L6-v2`). Specify `USING MODEL` to pin the collection to a specific model's output size — this must match the model you use in INSERT and SEARCH.
-
-If the collection already exists, the command succeeds with a message and does nothing.
-
----
-
-### Quantization — QUANTIZE clause
-
-Quantization reduces the memory footprint of vector collections and speeds up search at the cost of a small, controllable accuracy loss. QQL supports all three Qdrant quantization strategies via an optional `QUANTIZE` clause appended to `CREATE COLLECTION`.
-
-**Three strategies:**
-
-| Type | Compression | Accuracy Loss | Best For |
-|---|---|---|---|
-| `SCALAR` | 4× (float32 → int8) | < 1% | Most collections — best balance |
-| `BINARY` | 32× (float32 → 1-bit) | Higher | High-dimensional vectors (768+), speed priority |
-| `PRODUCT` | 4× (configurable) | Variable | Memory-constrained deployments |
-
-**Full syntax:**
-```
-CREATE COLLECTION <name> ... QUANTIZE SCALAR [QUANTILE <0.0–1.0>] [ALWAYS RAM]
-CREATE COLLECTION <name> ... QUANTIZE BINARY  [ALWAYS RAM]
-CREATE COLLECTION <name> ... QUANTIZE PRODUCT [ALWAYS RAM]
-```
-
-- **`QUANTILE <float>`** — (scalar only) calibration quantile for the INT8 conversion; defaults to Qdrant's built-in default (0.99) when omitted. Lower values improve outlier handling at the cost of a slightly wider value range.
-- **`ALWAYS RAM`** — keep the **original** (unquantized) vectors in RAM for rescoring, sacrificing memory savings but preserving accuracy during re-ranking. Supported by all three types.
-- **`QUANTIZE`** always appears **after** all other clauses (`HYBRID`, `USING MODEL`, etc.).
-- For `PRODUCT`, the compression ratio is fixed at **4×** in this version.
-- When used with `HYBRID` collections, quantization applies only to the **dense** vector (Qdrant's behavior).
-
-**Examples:**
-
-Scalar quantization (recommended default):
-```sql
-CREATE COLLECTION research_papers QUANTIZE SCALAR
-```
-
-Scalar with explicit calibration and original vectors kept in RAM:
-```sql
-CREATE COLLECTION research_papers QUANTIZE SCALAR QUANTILE 0.95 ALWAYS RAM
-```
-
-Binary quantization for large high-dimensional embeddings:
-```sql
-CREATE COLLECTION research_papers QUANTIZE BINARY
-```
-
-Product quantization for maximum memory savings:
-```sql
-CREATE COLLECTION research_papers QUANTIZE PRODUCT ALWAYS RAM
-```
-
-Combined with hybrid collection:
-```sql
-CREATE COLLECTION research_papers HYBRID QUANTIZE SCALAR
-```
-
-Combined with a pinned model:
-```sql
-CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE SCALAR QUANTILE 0.99
-```
-
-Combined with hybrid + dense model:
-```sql
-CREATE COLLECTION research_papers USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE BINARY
-```
-
-**Valid combinations:**
-
-| Base form | + QUANTIZE SCALAR | + QUANTIZE BINARY | + QUANTIZE PRODUCT |
-|---|---|---|---|
-| `CREATE COLLECTION name` | ✓ | ✓ | ✓ |
-| `... HYBRID` | ✓ | ✓ | ✓ |
-| `... USING MODEL 'x'` | ✓ | ✓ | ✓ |
-| `... USING HYBRID` | ✓ | ✓ | ✓ |
-| `... USING HYBRID DENSE MODEL 'x'` | ✓ | ✓ | ✓ |
-
-> **Note:** INSERT and SEARCH on quantized collections work exactly the same as on non-quantized ones — no changes to INSERT or SEARCH syntax are needed.
-
----
-
-### CREATE INDEX — create a payload index
-
-Creates a payload index on a collection field. Payload indexes speed up `WHERE` clause filtering by allowing Qdrant to efficiently match on indexed fields.
-
-**Syntax:**
-```
-CREATE INDEX ON COLLECTION <collection_name> FOR <field_name> TYPE <schema_type>
-```
-
-**Supported schema types:**
-
-| Type | Description |
-|---|---|
-| `keyword` | Exact string match (e.g. status, category) |
-| `integer` | Whole numbers |
-| `float` | Decimal numbers |
-| `bool` | Boolean values |
-| `text` | Full-text search (enables `MATCH` operators) |
-| `geo` | Geospatial coordinates |
-| `datetime` | Date/time values |
-
-**Examples:**
-
-Create a keyword index on a string field:
-```sql
-CREATE INDEX ON COLLECTION articles FOR category TYPE keyword
-```
-
-Create an integer index on a numeric field:
-```sql
-CREATE INDEX ON COLLECTION articles FOR year TYPE integer
-```
-
-Create a text index for full-text search:
-```sql
-CREATE INDEX ON COLLECTION articles FOR title TYPE text
-```
-
-Nested field (dot notation):
-```sql
-CREATE INDEX ON COLLECTION articles FOR meta.author TYPE keyword
-```
-
-**Rules:**
-- The collection must already exist. Raises an error otherwise.
-- The schema type must be one of: `keyword`, `integer`, `float`, `bool`, `text`, `geo`, `datetime`.
-- Indexes are idempotent — creating the same index twice succeeds silently.
-
----
-
-### DROP COLLECTION — delete a collection
-
-Permanently deletes a collection and **all points inside it**. This operation is irreversible.
-
-**Syntax:**
-```
-DROP COLLECTION <collection_name>
-```
-
-**Example:**
-```sql
-DROP COLLECTION old_experiments
-```
-
-Raises an error if the collection does not exist.
-
----
-
-### DELETE — remove points
-
-Deletes one or more points from a collection. You can delete by specific ID or by a `WHERE` filter that matches multiple points.
-
-**Syntax:**
-```
-DELETE FROM <collection_name> WHERE id = '<point_id>'
-DELETE FROM <collection_name> WHERE id = <integer_id>
-DELETE FROM <collection_name> WHERE <filter>
-```
-
-**Examples:**
-
-Delete by UUID string:
-```sql
-DELETE FROM articles WHERE id = '3f2e1a4b-8c91-4d0e-b123-abc123def456'
-```
-
-Delete by integer ID:
-```sql
-DELETE FROM articles WHERE id = 42
-```
-
-Delete all points matching a filter:
-```sql
-DELETE FROM articles WHERE category = 'archived'
-```
-
-Delete with a compound filter:
-```sql
-DELETE FROM articles WHERE year < 2020 AND status = 'draft'
-```
-
-To find a point's ID, run a SEARCH first and copy the ID from the results table.
-
----
-
-## Script Files
-
-QQL supports reading from and writing to `.qql` script files, making it easy to automate bulk operations, seed databases, and back up collections.
-
----
-
-### EXECUTE — run a .qql script file
-
-Execute a file containing multiple QQL statements in sequence. Each statement is parsed and executed in order. `--` comments are stripped before parsing.
-
-**CLI usage:**
-```bash
-qql execute /path/to/script.qql
-
-# Stop on first error instead of continuing through all statements
-qql execute /path/to/script.qql --stop-on-error
-```
-
-**In-shell usage (inside the QQL REPL):**
-```
-qql> EXECUTE /path/to/script.qql
-qql> \e /path/to/script.qql
-```
-
-**Script format:**
-
-```sql
--- This is a comment — the entire line is ignored
--- ============================================================
---  QQL Script — populate articles collection
--- ============================================================
-
--- Step 1: create the collection
-CREATE COLLECTION articles
-
--- Step 2: bulk insert records
-INSERT BULK INTO COLLECTION articles VALUES [
-  {'text': 'Neural networks learn representations', 'year': 2023},
-  {'text': 'Attention mechanisms in transformers',  'year': 2024}
-]
-
--- Step 3: verify
-SHOW COLLECTIONS
-```
-
-**Rules:**
-- `--` to end-of-line is a comment and is ignored (inline or full-line)
-- Statements can span multiple lines (e.g. `INSERT BULK ... VALUES [...]`)
-- `RECOMMEND` statements work in `.qql` files the same way they do in the REPL
-- Blank lines between statements are ignored
-- By default all statements run even if one fails; use `--stop-on-error` to halt early
-
-**Included examples:**
-- [`resources/sample.qql`](resources/sample.qql) seeds the demo medical dataset
-- [`resources/sample_v2.qql`](resources/sample_v2.qql) is a compact end-to-end example with explicit IDs and runnable `RECOMMEND` statements
-
-**Example output:**
-```
-Executing: /path/to/script.qql
-
-[1/3] CREATE COLLECTION articles
-  ✓ Collection 'articles' created (384-dimensional vectors, cosine distance)
-[2/3] INSERT BULK INTO COLLECTION articles VALUES [ …
-  ✓ Inserted 2 points
-[3/3] SHOW COLLECTIONS
-  ✓ 1 collection(s) found
-
-Done. 3/3 statement(s) succeeded.
-```
-
----
-
-### DUMP COLLECTION — export collection to a .qql script file
-
-Export every point in a collection to a `.qql` script file. The generated file is valid QQL — it can be re-imported with `qql execute` to restore or migrate the collection. Points are written in batches of 50 as `INSERT BULK` statements.
-
-**CLI usage:**
-```bash
-qql dump <collection_name> <output.qql>
-```
-
-**In-shell usage (inside the QQL REPL):**
-```
-qql> DUMP COLLECTION <name> <output.qql>
-```
-
-**Example:**
-```bash
-qql dump medical_records /tmp/medical_records.qql
-```
-
-```
-Dumping: 'medical_records'  →  /tmp/medical_records.qql
-
-  Collection type : hybrid (dense + sparse)
-  Points          : 41
-  Batches         : 1  (50 points/batch)
-
-  [1/1] wrote 41 point(s)
-
-Done. 41 point(s) written.
-```
-
-**Generated file structure:**
-```sql
--- ============================================================
--- QQL Dump — collection: medical_records
--- Generated : 2026-04-19 14:32:11
--- Points    : 41
--- Type      : hybrid (dense + sparse)
--- Note      : Re-importing re-embeds all text using the
---             configured model (see: qql connect).
--- ============================================================
-
-CREATE COLLECTION medical_records HYBRID
-
--- Batch 1 / 1  (records 1–41)
-INSERT BULK INTO COLLECTION medical_records VALUES [
-  {
-    'text': 'Alzheimers disease is characterized by...',
-    'title': 'Alzheimers Disease Overview',
-    'department': 'neurology',
-    'year': 2023,
-    'peer_reviewed': true
-  },
-  ...
-] USING HYBRID
-
--- ============================================================
--- End of dump
--- Written : 41
--- Skipped : 0  (no 'text' field)
--- ============================================================
-```
-
-**Round-trip workflow — backup and restore:**
-```bash
-# 1. Dump the collection
-qql dump medical_records backup.qql
-
-# 2. Drop it
-qql> DROP COLLECTION medical_records
-
-# 3. Restore from the dump
-qql execute backup.qql
-```
-
-**Rules and notes:**
-- Points without a `'text'` payload field are **skipped** (counted in the footer comment).
-- Hybrid collections produce `CREATE COLLECTION <name> HYBRID` and `INSERT BULK ... USING HYBRID` statements.
-- Dense collections produce plain `CREATE COLLECTION <name>` and `INSERT BULK` statements.
-- All payload value types are preserved: strings, integers, floats, booleans (`true`/`false`), `null`, lists, and nested dicts.
-- Re-importing re-embeds all text using your currently configured model — use the same model as the original collection to preserve semantic accuracy.
-- Parent directories of the output path are created automatically.
-
----
-
-## Embedding Models
-
-QQL uses [Fastembed](https://github.com/qdrant/fastembed) to convert text into vectors locally — no external API call is needed.
-
-### Dense embedding (default)
-
-```
-sentence-transformers/all-MiniLM-L6-v2
-```
-
-- Vector dimensions: **384**
-- Size: ~90 MB (downloaded on first use, cached locally)
-- Good balance of speed and quality for English text
-
-### Sparse embedding (hybrid mode default)
-
-```
-Qdrant/bm25
-```
-
-- Classic BM25 with IDF weighting
-- Indices and values are generated as a sparse vector; no fixed dimensions
-- Uses asymmetric encoding: `embed()` for documents, `query_embed()` for queries
-
-### Specifying models
-
-Add `USING MODEL '<model_name>'` for dense-only mode, or `DENSE MODEL` / `SPARSE MODEL` after `USING HYBRID`:
-
-```sql
--- Dense only with custom model
-INSERT INTO docs VALUES {'text': 'hello'} USING MODEL 'BAAI/bge-small-en-v1.5'
-SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5'
-
--- Hybrid with custom dense model
-SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
-
--- Hybrid with custom sparse model
-SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID SPARSE MODEL 'prithivida/Splade_PP_en_v1'
-
--- Hybrid with both custom
-SEARCH docs SIMILAR TO 'hello' LIMIT 5
-  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' SPARSE MODEL 'prithivida/Splade_PP_en_v1'
-```
-
-### Commonly available dense models (Fastembed)
-
-| Model | Dimensions | Notes |
-|---|---|---|
-| `sentence-transformers/all-MiniLM-L6-v2` | 384 | Default. Fast, good general quality |
-| `BAAI/bge-small-en-v1.5` | 384 | Strong English retrieval |
-| `BAAI/bge-base-en-v1.5` | 768 | Higher quality, larger size |
-| `BAAI/bge-large-en-v1.5` | 1024 | Best quality, slowest |
-| `sentence-transformers/all-mpnet-base-v2` | 768 | Strong semantic similarity |
-
-### Commonly available sparse models (Fastembed)
-
-| Model | Notes |
-|---|---|
-| `Qdrant/bm25` | Default sparse model. Classic BM25 + IDF |
-| `prithivida/Splade_PP_en_v1` | SPLADE++ — strong keyword + semantic overlap |
-| `Qdrant/Unicoil` | UniCOIL sparse encoder |
-
-### Cross-encoder reranking (RERANK default)
-
-```
-cross-encoder/ms-marco-MiniLM-L-6-v2
-```
-
-- A passage reranker fine-tuned on MS MARCO.
-- No new dependencies — `TextCrossEncoder` is included in the `fastembed` package.
-- Override with `RERANK MODEL '<model_name>'`.
-
-### Commonly available cross-encoder models (Fastembed)
-
-| Model | Notes |
-|---|---|
-| `cross-encoder/ms-marco-MiniLM-L-6-v2` | Default. Fast passage reranker |
-| `cross-encoder/ms-marco-MiniLM-L-12-v2` | Larger, higher quality |
-| `BAAI/bge-reranker-base` | Strong general-purpose reranker |
-| `BAAI/bge-reranker-large` | Highest quality, slower |
-
-> Models are downloaded automatically on first use and cached by Fastembed. Loading a new model for the first time takes a few seconds.
-
-### Model consistency rule
-
-Every collection is created with a fixed vector size determined by the model used on first INSERT (or CREATE COLLECTION). If you try to INSERT into an existing collection using a different model that produces different dimensions, QQL will raise an error:
-
-```
-Error: Vector dimension mismatch: collection 'docs' expects 384 dims,
-but model produces 768 dims. Specify a compatible model with USING MODEL '<model>'.
-```
-
----
-
-## Value Types in Dictionaries
-
-The `VALUES` dictionary (and nested dicts) supports these types:
-
-| Type | Example | Notes |
-|---|---|---|
-| String | `'hello'` or `"hello"` | Single or double quotes |
-| Integer | `42`, `-7` | Whole numbers, negative allowed |
-| Float | `3.14`, `-0.5` | Decimal numbers |
-| Boolean | `true`, `false` | Case-insensitive |
-| Null | `null` | Case-insensitive |
-| Nested dict | `{'key': 'val'}` | Arbitrary nesting |
-| List | `['a', 'b', 1]` | Mixed types allowed |
-
-**Example using every type:**
-```sql
-INSERT INTO demo VALUES {
-  'text':    'example document',
-  'count':   42,
-  'score':   0.95,
-  'active':  true,
-  'deleted': false,
-  'ref':     null,
-  'meta':    {'source': 'web', 'lang': 'en'},
-  'tags':    ['ai', 'nlp', 'search']
-}
-```
-
-Trailing commas in dicts and lists are allowed:
-```sql
-INSERT INTO demo VALUES {'text': 'hi', 'x': 1,}
-```
-
----
-
-## Configuration File
-
-The connection config is stored at `~/.qql/config.json`:
-
-```json
-{
-  "url": "http://localhost:6333",
-  "secret": null,
-  "default_model": "sentence-transformers/all-MiniLM-L6-v2"
-}
-```
-
-| Field | Description |
-|---|---|
-| `url` | Qdrant instance URL |
-| `secret` | API key (null if not required) |
-| `default_model` | Dense embedding model used when no `USING MODEL` clause is given |
-
-You can edit this file directly to change the default model without reconnecting:
-
-```json
-{
-  "url": "http://localhost:6333",
-  "secret": null,
-  "default_model": "BAAI/bge-small-en-v1.5"
-}
-```
-
----
-
-## Programmatic Usage
-
-QQL can also be used as a Python library without the CLI:
-
-```python
-from qql import run_query
-
-# Insert a document (dense-only)
-result = run_query(
-    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world', 'author': 'alice', 'year': 2024}",
-    url="http://localhost:6333",
-)
-print(result.message)   # "Inserted 1 point [<id>]"
-print(result.data)      # {"id": 1001 or "<uuid>", "collection": "notes"}
-
-# Insert with hybrid vectors
-result = run_query(
-    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world'} USING HYBRID",
-    url="http://localhost:6333",
-)
-print(result.message)   # "Inserted 1 point [<id>] (hybrid)"
-
-# Dense search with WHERE filter
-result = run_query(
-    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023 AND author != 'bot'",
-    url="http://localhost:6333",
-)
-for hit in result.data:
-    print(hit["score"], hit["payload"])
-
-# Hybrid search with WHERE filter
-result = run_query(
-    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 USING HYBRID WHERE year >= 2023",
-    url="http://localhost:6333",
-)
-for hit in result.data:
-    print(hit["score"], hit["payload"])
-```
-
-Or use the pipeline directly for more control:
-
-```python
-from qdrant_client import QdrantClient
-from qql.lexer import Lexer
-from qql.parser import Parser
-from qql.executor import Executor
-from qql.config import QQLConfig
-
-client = QdrantClient(url="http://localhost:6333")
-config = QQLConfig(url="http://localhost:6333")
-executor = Executor(client, config)
-
-query = "SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 USING HYBRID WHERE category = 'cv'"
-tokens = Lexer().tokenize(query)
-node = Parser(tokens).parse()
-result = executor.execute(node)
-
-for hit in result.data:
-    print(hit["score"], hit["payload"])
-```
-
-### ExecutionResult
-
-All operations return an `ExecutionResult`:
-
-```python
-@dataclass
-class ExecutionResult:
-    success: bool       # True if operation succeeded
-    message: str        # Human-readable summary
-    data: Any           # Operation-specific payload (see below)
-```
-
-| Operation | `result.data` type |
-|---|---|
-| INSERT (dense) | `{"id": int | "<uuid>", "collection": "<name>"}` |
-| INSERT (hybrid) | `{"id": int | "<uuid>", "collection": "<name>"}` |
-| SEARCH | `[{"id": str, "score": float, "payload": dict}, ...]` |
-| RECOMMEND | `[{"id": str, "score": float, "payload": dict}, ...]` |
-| SHOW COLLECTIONS | `["name1", "name2", ...]` |
-| CREATE COLLECTION | `None` |
-| DROP COLLECTION | `None` |
-| DELETE | `None` |
-
----
-
-## Project Structure
-
-```
-qql/
-├── pyproject.toml          # Package config; installs the `qql` CLI command
-├── src/
-│   └── qql/
-│       ├── __init__.py     # Public API: run_query()
-│       ├── cli.py          # CLI entry point: connect, disconnect, REPL
-│       ├── config.py       # QQLConfig dataclass + ~/.qql/config.json I/O
-│       ├── exceptions.py   # QQLError, QQLSyntaxError, QQLRuntimeError
-│       ├── lexer.py        # Tokenizer: string → List[Token]
-│       ├── ast_nodes.py    # Frozen dataclasses for each statement and filter type
-│       ├── parser.py       # Recursive descent parser: tokens → AST node
-│       ├── embedder.py     # Embedder (dense) + SparseEmbedder (BM25) + CrossEncoderEmbedder (rerank)
-│       └── executor.py     # AST node → Qdrant client call + filter + hybrid search
-└── tests/
-    ├── test_lexer.py       # Tokenizer unit tests (keywords, operators, dot-paths, hybrid tokens)
-    ├── test_parser.py      # Parser unit tests (all statements + WHERE filters + hybrid clauses)
-    └── test_executor.py    # Executor unit tests (mocked Qdrant client, filter builders, hybrid ops)
+-- Scripts
+EXECUTE /path/to/script.qql
+DUMP articles /path/to/backup.qql
 ```
 
 ---
@@ -1519,24 +134,10 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
 pytest tests/ -v
 ```
 
-Expected output: **212 tests passing**.
+Expected: **375 tests passing**.
 
 ---
 
-## Error Reference
+## License
 
-| Error | Cause | Fix |
-|---|---|---|
-| `Not connected. Run: qql connect --url <url>` | No `~/.qql/config.json` found | Run `qql connect --url <url>` first |
-| `Connection failed: ...` | Qdrant unreachable at given URL | Check that Qdrant is running and the URL is correct |
-| `INSERT requires a 'text' field in VALUES` | `text` key missing from the VALUES dict | Add `'text': '...'` to your dict |
-| `Vector dimension mismatch: collection '...' expects X dims, but model produces Y dims` | Model used in INSERT differs from the one used to create the collection | Use `USING MODEL` to specify the same model as the collection was created with |
-| `Collection '...' does not exist` | SEARCH / DROP / DELETE on a non-existent collection | Check name spelling or run `SHOW COLLECTIONS` |
-| `Unexpected token '...'; expected a QQL statement keyword` | Unrecognized statement | Check the query syntax; QQL does not support SQL SELECT |
-| `Unterminated string literal (at position N)` | A string is missing its closing quote | Close the string with a matching `'` or `"` |
-| `Unexpected character '@' (at position N)` | A character not part of QQL syntax | Remove or quote the offending character |
-| `Expected a filter operator after field '...'` | Unknown operator in WHERE clause | Use one of: `=`, `!=`, `>`, `>=`, `<`, `<=`, `IN`, `NOT IN`, `BETWEEN`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `MATCH` |
-| `Expected ')' ...` | Unclosed parenthesis in WHERE clause | Add the missing `)` to close the group |
-| `Qdrant error during SEARCH: ...` | Hybrid search on a non-hybrid collection, or wrong vector names | Ensure the collection was created with `HYBRID` before using `USING HYBRID` in INSERT/SEARCH |
-| `Unknown index type '...'` | Invalid schema type in CREATE INDEX | Use one of: `keyword`, `integer`, `float`, `bool`, `text`, `geo`, `datetime` |
-| `Qdrant error during CREATE INDEX: ...` | Qdrant rejected the index creation | Check field name and collection state |
+MIT © [Kameshwara Pavan Kumar Mantha](https://github.com/pavanjava)
diff --git a/docs/_config.yml b/docs/_config.yml
new file mode 100644
index 0000000..cdd476e
--- /dev/null
+++ b/docs/_config.yml
@@ -0,0 +1,10 @@
+theme: minima
+title: "QQL — Qdrant Query Language"
+description: "SQL-like query language and CLI for Qdrant vector database — INSERT, SEARCH, hybrid search, reranking, quantization, and more."
+url: "https://pavanjava.github.io/qql"
+baseurl: "/qql"
+repository: "pavanjava/qql"
+
+# Disable Jekyll processing of the HTML file (it has its own styling)
+include:
+  - index.html
diff --git a/docs/collections.md b/docs/collections.md
new file mode 100644
index 0000000..8082bc1
--- /dev/null
+++ b/docs/collections.md
@@ -0,0 +1,216 @@
+# Managing Collections
+
+---
+
+## SHOW COLLECTIONS — list collections
+
+Lists all collections in the connected Qdrant instance.
+
+```sql
+SHOW COLLECTIONS
+```
+
+**Output:**
+```
+✓ 3 collection(s) found
+┌──────────────────┐
+│ Collection       │
+├──────────────────┤
+│ articles         │
+│ notes            │
+│ products         │
+└──────────────────┘
+```
+
+---
+
+## CREATE COLLECTION — create a collection
+
+Explicitly creates a new empty collection. Collections are also created automatically on the first INSERT, so this command is optional — use it when you want to pre-create a collection before inserting data.
+
+**Syntax:**
+```
+CREATE COLLECTION <collection_name>
+CREATE COLLECTION <collection_name> HYBRID
+CREATE COLLECTION <collection_name> USING MODEL '<model_name>'
+CREATE COLLECTION <collection_name> USING HYBRID
+CREATE COLLECTION <collection_name> USING HYBRID DENSE MODEL '<model>'
+```
+
+Any of the above forms can be followed by an optional `QUANTIZE` clause — see [Quantization](#quantization--quantize-clause) below.
+
+**Examples:**
+
+Dense-only collection (standard, uses default model dimensions):
+```sql
+CREATE COLLECTION research_papers
+```
+
+Dense-only collection pinned to a specific model (768-dimensional):
+```sql
+CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5'
+```
+
+Hybrid collection (dense + sparse BM25, default models):
+```sql
+CREATE COLLECTION research_papers HYBRID
+```
+
+Hybrid collection with a custom dense model:
+```sql
+CREATE COLLECTION research_papers USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
+```
+
+When `USING MODEL` is omitted, the collection uses the **default embedding model's dimensions** (384 for `all-MiniLM-L6-v2`). If the collection already exists, the command succeeds with a message and does nothing.
+
+---
+
+## Quantization — QUANTIZE clause
+
+Quantization reduces the memory footprint of vector collections and speeds up search at the cost of a small, controllable accuracy loss. QQL supports all three Qdrant quantization strategies via an optional `QUANTIZE` clause appended to `CREATE COLLECTION`.
+
+**Three strategies:**
+
+| Type | Compression | Accuracy Loss | Best For |
+|---|---|---|---|
+| `SCALAR` | 4× (float32 → int8) | < 1% | Most collections — best balance |
+| `BINARY` | 32× (float32 → 1-bit) | Higher | High-dimensional vectors (768+), speed priority |
+| `PRODUCT` | 4× (configurable) | Variable | Memory-constrained deployments |
+
+**Full syntax:**
+```
+CREATE COLLECTION <name> ... QUANTIZE SCALAR [QUANTILE <0.0–1.0>] [ALWAYS RAM]
+CREATE COLLECTION <name> ... QUANTIZE BINARY  [ALWAYS RAM]
+CREATE COLLECTION <name> ... QUANTIZE PRODUCT [ALWAYS RAM]
+```
+
+- **`QUANTILE <float>`** — (scalar only) calibration quantile for the INT8 conversion; defaults to Qdrant's built-in default (0.99) when omitted.
+- **`ALWAYS RAM`** — keep the **quantized** vectors in RAM at all times, regardless of the collection's `on_disk` setting. Improves search throughput at the cost of higher RAM usage for the compressed index. The original full-precision vectors are stored and managed independently of this flag. Supported by all three quantization types.
+- **`QUANTIZE`** always appears **after** all other clauses (`HYBRID`, `USING MODEL`, etc.).
+- For `PRODUCT`, the compression ratio is fixed at **4×** in this version.
+- When used with `HYBRID` collections, quantization applies only to the **dense** vector.
+
+**Examples:**
+
+Scalar quantization (recommended default):
+```sql
+CREATE COLLECTION research_papers QUANTIZE SCALAR
+```
+
+Scalar with explicit calibration and quantized vectors pinned to RAM:
+```sql
+CREATE COLLECTION research_papers QUANTIZE SCALAR QUANTILE 0.95 ALWAYS RAM
+```
+
+Binary quantization for large high-dimensional embeddings:
+```sql
+CREATE COLLECTION research_papers QUANTIZE BINARY
+```
+
+Product quantization for maximum memory savings:
+```sql
+CREATE COLLECTION research_papers QUANTIZE PRODUCT ALWAYS RAM
+```
+
+Combined with hybrid collection:
+```sql
+CREATE COLLECTION research_papers HYBRID QUANTIZE SCALAR
+```
+
+Combined with a pinned model:
+```sql
+CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE SCALAR QUANTILE 0.99
+```
+
+**Valid combinations:**
+
+| Base form | + QUANTIZE SCALAR | + QUANTIZE BINARY | + QUANTIZE PRODUCT |
+|---|---|---|---|
+| `CREATE COLLECTION name` | ✓ | ✓ | ✓ |
+| `... HYBRID` | ✓ | ✓ | ✓ |
+| `... USING MODEL 'x'` | ✓ | ✓ | ✓ |
+| `... USING HYBRID` | ✓ | ✓ | ✓ |
+| `... USING HYBRID DENSE MODEL 'x'` | ✓ | ✓ | ✓ |
+
+> INSERT and SEARCH on quantized collections work exactly the same as on non-quantized ones — no changes to INSERT or SEARCH syntax are needed.
+
+---
+
+## CREATE INDEX — create a payload index
+
+Creates a payload index on a collection field. Payload indexes speed up `WHERE` clause filtering by allowing Qdrant to efficiently match on indexed fields.
+
+**Syntax:**
+```
+CREATE INDEX ON COLLECTION <collection_name> FOR <field_name> TYPE <schema_type>
+```
+
+**Supported schema types:**
+
+| Type | Description |
+|---|---|
+| `keyword` | Exact string match (e.g. status, category) |
+| `integer` | Whole numbers |
+| `float` | Decimal numbers |
+| `bool` | Boolean values |
+| `text` | Full-text search (enables `MATCH` operators) |
+| `geo` | Geospatial coordinates |
+| `datetime` | Date/time values |
+
+**Examples:**
+
+```sql
+CREATE INDEX ON COLLECTION articles FOR category TYPE keyword
+CREATE INDEX ON COLLECTION articles FOR year TYPE integer
+CREATE INDEX ON COLLECTION articles FOR title TYPE text
+CREATE INDEX ON COLLECTION articles FOR meta.author TYPE keyword
+```
+
+**Rules:**
+- The collection must already exist. Raises an error otherwise.
+- Indexes are idempotent — creating the same index twice succeeds silently.
+
+---
+
+## DROP COLLECTION — delete a collection
+
+Permanently deletes a collection and **all points inside it**. This operation is irreversible.
+
+```sql
+DROP COLLECTION old_experiments
+```
+
+Raises an error if the collection does not exist.
+
+---
+
+## DELETE — remove points
+
+Deletes one or more points from a collection by specific ID or by a `WHERE` filter.
+
+**Syntax:**
+```
+DELETE FROM <collection_name> WHERE id = '<point_id>'
+DELETE FROM <collection_name> WHERE id = <integer_id>
+DELETE FROM <collection_name> WHERE <filter>
+```
+
+**Examples:**
+
+```sql
+-- Delete by UUID
+DELETE FROM articles WHERE id = '3f2e1a4b-8c91-4d0e-b123-abc123def456'
+
+-- Delete by integer ID
+DELETE FROM articles WHERE id = 42
+
+-- Delete all points matching a filter
+DELETE FROM articles WHERE category = 'archived'
+
+-- Delete with a compound filter
+DELETE FROM articles WHERE year < 2020 AND status = 'draft'
+```
+
+**Notes:**
+- If no points match the filter or ID, the operation succeeds silently with a count of 0.
+- The collection itself must exist; deleting from a non-existent collection raises an error.
diff --git a/docs/filters.md b/docs/filters.md
new file mode 100644
index 0000000..2434546
--- /dev/null
+++ b/docs/filters.md
@@ -0,0 +1,166 @@
+# WHERE Clause Filters
+
+The `WHERE` clause lets you filter on any payload field using SQL-style predicates. All standard comparison, range, membership, null-check, and full-text operators are supported.
+
+`WHERE` works on `SEARCH`, `RECOMMEND`, and `DELETE` statements.
+
+---
+
+## Equality and inequality
+
+```sql
+-- Exact match
+SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE category = 'paper'
+
+-- Not equal
+SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE status != 'draft'
+```
+
+---
+
+## Range comparisons
+
+```sql
+SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE score > 0.8
+SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE year < 2024
+SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE score >= 0.75
+SEARCH articles SIMILAR TO 'ai' LIMIT 5 WHERE year <= 2023
+```
+
+---
+
+## BETWEEN … AND
+
+```sql
+-- Inclusive range (equivalent to year >= 2018 AND year <= 2023)
+SEARCH articles SIMILAR TO 'history of ai' LIMIT 10 WHERE year BETWEEN 2018 AND 2023
+```
+
+---
+
+## IN and NOT IN
+
+```sql
+SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status IN ('published', 'reviewed')
+SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status NOT IN ('deleted', 'archived')
+```
+
+---
+
+## IS NULL and IS NOT NULL
+
+```sql
+SEARCH articles SIMILAR TO 'peer review' LIMIT 5 WHERE reviewer IS NULL
+SEARCH articles SIMILAR TO 'peer review' LIMIT 5 WHERE reviewer IS NOT NULL
+```
+
+---
+
+## IS EMPTY and IS NOT EMPTY
+
+```sql
+SEARCH articles SIMILAR TO 'untagged' LIMIT 5 WHERE tags IS EMPTY
+SEARCH articles SIMILAR TO 'categorized' LIMIT 5 WHERE tags IS NOT EMPTY
+```
+
+---
+
+## Full-text MATCH
+
+```sql
+-- All terms must appear in the field (requires a Qdrant full-text index)
+SEARCH articles SIMILAR TO 'search' LIMIT 10 WHERE title MATCH 'vector database'
+
+-- Any term can match
+SEARCH articles SIMILAR TO 'search' LIMIT 10 WHERE title MATCH ANY 'embedding retrieval'
+
+-- Exact phrase must appear
+SEARCH articles SIMILAR TO 'search' LIMIT 10 WHERE title MATCH PHRASE 'semantic search'
+```
+
+> To use `MATCH` operators efficiently, create a full-text index first:
+> ```sql
+> CREATE INDEX ON COLLECTION articles FOR title TYPE text
+> ```
+
+---
+
+## AND, OR, NOT — logical operators
+
+Operator precedence: `NOT` (highest) > `AND` > `OR` (lowest). Use parentheses to override.
+
+```sql
+-- AND: both conditions must be true
+SEARCH articles SIMILAR TO 'nlp' LIMIT 10 WHERE category = 'paper' AND year >= 2020
+
+-- OR: either condition can be true
+SEARCH articles SIMILAR TO 'llm' LIMIT 10 WHERE source = 'arxiv' OR source = 'pubmed'
+
+-- NOT: negate a condition
+SEARCH articles SIMILAR TO 'benchmark' LIMIT 10 WHERE NOT status = 'draft'
+
+-- Parentheses to group OR inside AND
+SEARCH articles SIMILAR TO 'conference paper' LIMIT 10
+  WHERE (source = 'arxiv' OR source = 'ieee') AND year >= 2022
+
+-- NOT on a parenthesized group
+SEARCH articles SIMILAR TO 'x' LIMIT 5 WHERE NOT (status = 'draft' OR status = 'deleted')
+```
+
+---
+
+## Dot-notation for nested fields
+
+```sql
+SEARCH articles SIMILAR TO 'wikipedia' LIMIT 5 WHERE meta.source = 'web'
+SEARCH cities SIMILAR TO 'large city' LIMIT 5 WHERE country.cities[].population > 1000000
+```
+
+---
+
+## WHERE also works in hybrid mode
+
+```sql
+SEARCH articles SIMILAR TO 'deep learning' LIMIT 10
+  USING HYBRID WHERE year BETWEEN 2020 AND 2024 AND status = 'published'
+```
+
+---
+
+## WHERE in DELETE
+
+```sql
+-- Delete by filter
+DELETE FROM articles WHERE category = 'archived'
+
+-- Delete with compound filter
+DELETE FROM articles WHERE year < 2020 AND status = 'draft'
+```
+
+---
+
+## Full filter reference
+
+| WHERE syntax | Description |
+|---|---|
+| `field = 'x'` | Exact match |
+| `field != 'x'` | Not equal |
+| `field > n` | Greater than |
+| `field >= n` | Greater than or equal |
+| `field < n` | Less than |
+| `field <= n` | Less than or equal |
+| `field BETWEEN a AND b` | Inclusive range |
+| `field IN ('a', 'b')` | Value in list |
+| `field NOT IN ('a', 'b')` | Value not in list |
+| `field IS NULL` | Field absent or null |
+| `field IS NOT NULL` | Field present and non-null |
+| `field IS EMPTY` | Field is an empty list |
+| `field IS NOT EMPTY` | Field is a non-empty list |
+| `field MATCH 'text'` | All terms present (full-text) |
+| `field MATCH ANY 'text'` | Any term present (full-text) |
+| `field MATCH PHRASE 'text'` | Exact phrase present (full-text) |
+| `A AND B` | Both conditions must hold |
+| `A OR B` | Either condition must hold |
+| `NOT A` | Condition must not hold |
+| `(A OR B) AND C` | Parentheses for grouping |
+| `meta.source = 'x'` | Dot-notation nested field |
diff --git a/docs/getting-started.md b/docs/getting-started.md
new file mode 100644
index 0000000..f44c1fd
--- /dev/null
+++ b/docs/getting-started.md
@@ -0,0 +1,154 @@
+# Getting Started with QQL
+
+QQL is a SQL-like query language and CLI for [Qdrant](https://qdrant.tech). Instead of writing Python SDK calls you write natural query statements to insert, search, manage, and delete vector data.
+
+---
+
+## How It Works
+
+Every statement goes through three stages:
+
+```
+Your query string
+      │
+      ▼
+  [ Lexer ]      — tokenizes the input into keywords, identifiers, literals
+      │
+      ▼
+  [ Parser ]     — builds a typed AST node (e.g. InsertStmt, SearchStmt)
+      │
+      ▼
+  [ Executor ]   — maps the AST node to a Qdrant client call
+      │
+      ▼
+  Qdrant instance
+```
+
+When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) to merge the results of both retrieval methods.
+
+---
+
+## Installation
+
+**Requirements:** Python 3.12+, a running Qdrant instance.
+
+### From PyPI
+
+```bash
+pip install qql-cli
+```
+
+### From source (development)
+
+```bash
+git clone https://github.com/pavanjava/qql
+cd qql
+pip install -e .
+```
+
+Or with [uv](https://docs.astral.sh/uv/):
+
+```bash
+uv sync
+```
+
+After installation the `qql` command is available globally in your terminal.
+
+---
+
+## Connecting to Qdrant
+
+Before running any queries you must connect to a Qdrant instance. The connection config is saved to `~/.qql/config.json` and reused automatically in future sessions.
+
+### Local Qdrant (no API key)
+
+```bash
+qql connect --url http://localhost:6333
+```
+
+### Qdrant Cloud (with API key)
+
+```bash
+qql connect --url https://<your-cluster>.qdrant.io --secret <your-api-key>
+```
+
+On success you will see:
+
+```
+Connecting to http://localhost:6333...
+Connected. Config saved to ~/.qql/config.json
+
+QQL Interactive Shell  •  http://localhost:6333
+Type help for available commands or exit to quit.
+
+qql>
+```
+
+### Starting Qdrant locally with Docker
+
+If you do not have a Qdrant instance running yet:
+
+```bash
+docker run -p 6333:6333 qdrant/qdrant
+```
+
+### Disconnecting
+
+To remove the saved connection config:
+
+```bash
+qql disconnect
+```
+
+---
+
+## The QQL Shell
+
+Once connected, running `qql` alone (no arguments) reads the saved config and opens the interactive shell:
+
+```bash
+qql
+```
+
+Inside the shell:
+
+| Input | Effect |
+|---|---|
+| A QQL statement | Executes it and prints the result |
+| `help` or `?` or `\h` | Prints a reference of all available commands |
+| `EXECUTE <path>` or `\e <path>` | Runs a `.qql` script file |
+| `DUMP <name> <output.qql>` | Exports a collection to a script file |
+| `exit` or `quit` or `\q` or `:q` | Exits the shell |
+| Empty line / Enter | Ignored |
+| Ctrl-D or Ctrl-C | Exits the shell |
+
+All keywords are **case-insensitive** — `INSERT`, `insert`, and `Insert` all work.
+
+---
+
+## Your First Queries
+
+```sql
+-- Create a collection and insert a document
+INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
+
+-- Search for similar documents
+SEARCH notes SIMILAR TO 'vector storage engines' LIMIT 3
+
+-- Filter results
+SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 WHERE year >= 2023
+
+-- List all collections
+SHOW COLLECTIONS
+```
+
+---
+
+## Next Steps
+
+- [INSERT / INSERT BULK](insert.md) — adding documents
+- [SEARCH / RECOMMEND / Hybrid / RERANK](search.md) — querying
+- [WHERE Filters](filters.md) — payload filtering
+- [Collections & Quantization](collections.md) — managing collections
+- [Scripts: EXECUTE / DUMP](scripts.md) — automating with script files
+- [Embedding Models](reference.md#embedding-models) — model reference
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..78cf475
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,189 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+
+  <!-- Primary SEO -->
+  <title>QQL — Qdrant Query Language | SQL-like CLI for Vector Search Engine</title>
+  <meta name="description" content="QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, and DELETE statements instead of Python SDK calls. Supports hybrid dense+sparse search, cross-encoder reranking, quantization, WHERE filters, and script execution." />
+  <meta name="keywords" content="qdrant, vector database, vector search, SQL query language, CLI, semantic search, hybrid search, embeddings, RAG, fastembed, NLP, dense sparse search" />
+  <meta name="author" content="Kameshwara Pavan Kumar Mantha" />
+  <link rel="canonical" href="https://pavanjava.github.io/qql/" />
+
+  <!-- Open Graph (link previews on social/Slack/LinkedIn) -->
+  <meta property="og:type" content="website" />
+  <meta property="og:url" content="https://pavanjava.github.io/qql/" />
+  <meta property="og:title" content="QQL — SQL-like Query Language for Qdrant" />
+  <meta property="og:description" content="Write INSERT, SEARCH, HYBRID, and RERANK queries for Qdrant instead of raw Python SDK calls. Open-source CLI + Python library." />
+  <meta property="og:image" content="https://raw.githubusercontent.com/pavanjava/qql/main/resources/qql-og.png" />
+
+  <!-- Twitter Card -->
+  <meta name="twitter:card" content="summary_large_image" />
+  <meta name="twitter:title" content="QQL — SQL-like Query Language for Qdrant" />
+  <meta name="twitter:description" content="Write INSERT, SEARCH, HYBRID, and RERANK queries for Qdrant instead of raw Python SDK calls." />
+
+  <!-- JSON-LD structured data -->
+  <script type="application/ld+json">
+  {
+    "@context": "https://schema.org",
+    "@type": "SoftwareApplication",
+    "name": "QQL — Qdrant Query Language",
+    "description": "A SQL-like query language and CLI for Qdrant vector database. Supports INSERT, SEARCH, RECOMMEND, hybrid dense+sparse search, cross-encoder reranking, quantization, WHERE filters, script execution, and collection dump/restore.",
+    "applicationCategory": "DeveloperApplication",
+    "operatingSystem": "Linux, macOS, Windows",
+    "programmingLanguage": "Python",
+    "url": "https://github.com/pavanjava/qql",
+    "downloadUrl": "https://pypi.org/project/qql-cli/",
+    "author": {
+      "@type": "Person",
+      "name": "Kameshwara Pavan Kumar Mantha"
+    },
+    "license": "https://opensource.org/licenses/MIT",
+    "keywords": "qdrant, vector database, vector search, SQL, CLI, semantic search, hybrid search, RAG, embeddings, fastembed"
+  }
+  </script>
+
+  <style>
+    *, *::before, *::after { box-sizing: border-box; }
+    body {
+      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+      margin: 0; padding: 0;
+      background: #0d1117; color: #e6edf3;
+      line-height: 1.6;
+    }
+    .hero {
+      max-width: 860px; margin: 0 auto;
+      padding: 80px 24px 40px;
+      text-align: center;
+    }
+    .hero h1 { font-size: 2.8rem; margin: 0 0 12px; color: #f0f6fc; }
+    .hero p.tagline { font-size: 1.2rem; color: #8b949e; margin: 0 0 32px; }
+    .badges { display: flex; gap: 8px; justify-content: center; flex-wrap: wrap; margin-bottom: 40px; }
+    .badges a img { height: 20px; }
+    pre {
+      background: #161b22; border: 1px solid #30363d; border-radius: 8px;
+      padding: 20px; text-align: left; overflow-x: auto;
+      font-size: 0.85rem; line-height: 1.5;
+    }
+    code { color: #79c0ff; }
+    .kw { color: #ff7b72; }
+    .str { color: #a5d6ff; }
+    .cmt { color: #8b949e; font-style: italic; }
+    .ok { color: #3fb950; }
+    .links {
+      max-width: 860px; margin: 0 auto; padding: 0 24px 40px;
+    }
+    .links h2 { color: #f0f6fc; border-bottom: 1px solid #30363d; padding-bottom: 8px; }
+    .grid {
+      display: grid;
+      grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
+      gap: 16px; margin-top: 16px;
+    }
+    .card {
+      background: #161b22; border: 1px solid #30363d; border-radius: 8px;
+      padding: 16px 20px; text-decoration: none; color: inherit;
+      transition: border-color 0.15s, transform 0.1s;
+    }
+    .card:hover { border-color: #58a6ff; transform: translateY(-2px); }
+    .card h3 { margin: 0 0 6px; font-size: 1rem; color: #58a6ff; }
+    .card p { margin: 0; font-size: 0.85rem; color: #8b949e; }
+    .cta {
+      max-width: 860px; margin: 0 auto; padding: 20px 24px 80px;
+      text-align: center;
+    }
+    .btn {
+      display: inline-block; padding: 12px 28px; border-radius: 6px;
+      font-size: 1rem; font-weight: 600; text-decoration: none; margin: 6px;
+    }
+    .btn-primary { background: #238636; color: #fff; }
+    .btn-primary:hover { background: #2ea043; }
+    .btn-secondary { background: #21262d; color: #e6edf3; border: 1px solid #30363d; }
+    .btn-secondary:hover { background: #30363d; }
+    footer { text-align: center; padding: 20px; color: #8b949e; font-size: 0.8rem;
+             border-top: 1px solid #21262d; }
+  </style>
+</head>
+<body>
+
+<div class="hero">
+  <h1>QQL</h1>
+  <p class="tagline">SQL-like query language and CLI for <a href="https://qdrant.tech" style="color:#58a6ff;">Qdrant</a> vector search engine</p>
+
+  <div class="badges">
+    <a href="https://pypi.org/project/qql-cli/"><img src="https://img.shields.io/pypi/v/qql-cli?color=blue&label=PyPI" alt="PyPI version" /></a>
+    <a href="https://pypi.org/project/qql-cli/"><img src="https://img.shields.io/pypi/pyversions/qql-cli" alt="Python versions" /></a>
+    <a href="https://github.com/pavanjava/qql/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="MIT License" /></a>
+    <a href="https://github.com/pavanjava/qql/actions"><img src="https://img.shields.io/badge/tests-375%20passing-brightgreen" alt="375 tests" /></a>
+  </div>
+
+  <pre><span class="cmt"># Install</span>
+<span class="ok">$</span> pip install qql-cli
+
+<span class="cmt"># Connect to Qdrant</span>
+<span class="ok">$</span> qql connect --url http://localhost:6333
+
+<span class="cmt"># Insert a document — text is embedded automatically</span>
+<span class="ok">qql&gt;</span> <span class="kw">INSERT INTO COLLECTION</span> notes <span class="kw">VALUES</span> {<span class="str">'text'</span>: <span class="str">'Qdrant is a vector database'</span>, <span class="str">'year'</span>: 2024}
+<span class="ok">✓ Inserted 1 point [3f2e1a4b-...]</span>
+
+<span class="cmt"># Search with a WHERE filter</span>
+<span class="ok">qql&gt;</span> <span class="kw">SEARCH</span> notes <span class="kw">SIMILAR TO</span> <span class="str">'vector storage engines'</span> <span class="kw">LIMIT</span> 5 <span class="kw">WHERE</span> year >= 2023
+<span class="ok">✓ Found 1 result(s)</span>
+
+<span class="cmt"># Hybrid dense+sparse search with cross-encoder reranking</span>
+<span class="ok">qql&gt;</span> <span class="kw">SEARCH</span> notes <span class="kw">SIMILAR TO</span> <span class="str">'vector databases'</span> <span class="kw">LIMIT</span> 5 <span class="kw">USING HYBRID RERANK</span>
+<span class="ok">✓ Found 1 result(s) (hybrid, reranked)</span></pre>
+</div>
+
+<div class="links">
+  <h2>Documentation</h2>
+  <div class="grid">
+    <a class="card" href="getting-started">
+      <h3>Getting Started</h3>
+      <p>Installation, connecting to Qdrant, your first queries</p>
+    </a>
+    <a class="card" href="insert">
+      <h3>INSERT / INSERT BULK</h3>
+      <p>Adding documents, batch inserts, payload types</p>
+    </a>
+    <a class="card" href="search">
+      <h3>SEARCH / RECOMMEND</h3>
+      <p>Semantic search, hybrid search, reranking, recommendations</p>
+    </a>
+    <a class="card" href="filters">
+      <h3>WHERE Filters</h3>
+      <p>Full SQL-style filter operators — range, IN, MATCH, AND/OR/NOT</p>
+    </a>
+    <a class="card" href="collections">
+      <h3>Collections &amp; Quantization</h3>
+      <p>CREATE, DROP, quantize (scalar/binary/product), payload indexes</p>
+    </a>
+    <a class="card" href="scripts">
+      <h3>Scripts: EXECUTE / DUMP</h3>
+      <p>Run script files, export collections for backup/migration</p>
+    </a>
+    <a class="card" href="programmatic">
+      <h3>Programmatic Usage</h3>
+      <p>Use QQL as a Python library without the CLI</p>
+    </a>
+    <a class="card" href="reference">
+      <h3>Reference</h3>
+      <p>Embedding models, config file, project structure, error guide</p>
+    </a>
+  </div>
+</div>
+
+<div class="cta">
+  <a class="btn btn-primary" href="https://github.com/pavanjava/qql">View on GitHub</a>
+  <a class="btn btn-secondary" href="https://pypi.org/project/qql-cli/">PyPI Package</a>
+  <a class="btn btn-secondary" href="getting-started">Get Started →</a>
+</div>
+
+<footer>
+  QQL is open-source software released under the MIT License.<br/>
+  Built on <a href="https://qdrant.tech" style="color:#58a6ff;">Qdrant</a> and <a href="https://github.com/qdrant/fastembed" style="color:#58a6ff;">Fastembed</a>.
+</footer>
+
+</body>
+</html>
diff --git a/docs/insert.md b/docs/insert.md
new file mode 100644
index 0000000..4f33e07
--- /dev/null
+++ b/docs/insert.md
@@ -0,0 +1,156 @@
+# INSERT — Adding Documents to Qdrant
+
+QQL provides two insert statements: `INSERT` for single documents and `INSERT BULK` for batch inserts.
+
+---
+
+## INSERT — add a point
+
+Inserts a new document into a collection. The `text` field is **mandatory** — it is automatically vectorized and stored as the point's vector. All other fields become searchable payload (metadata).
+
+If the collection does not exist yet, it is **created automatically** with the correct vector dimensions.
+
+If you include an `id` field in `VALUES`, QQL uses it as the Qdrant point ID. Supported explicit IDs are unsigned integers or UUID strings. If you omit `id`, QQL generates a UUID automatically.
+
+**Syntax:**
+```
+INSERT INTO COLLECTION <collection_name> VALUES {<dict>}
+INSERT INTO COLLECTION <collection_name> VALUES {<dict>} USING MODEL '<model_name>'
+INSERT INTO COLLECTION <collection_name> VALUES {<dict>} USING HYBRID
+INSERT INTO COLLECTION <collection_name> VALUES {<dict>} USING HYBRID DENSE MODEL '<model>' SPARSE MODEL '<model>'
+```
+
+**Examples:**
+
+Minimal insert (text only):
+```sql
+INSERT INTO COLLECTION articles VALUES {'text': 'Qdrant supports cosine similarity search'}
+```
+
+Insert with metadata:
+```sql
+INSERT INTO COLLECTION articles VALUES {
+  'id': 1001,
+  'text': 'Neural networks learn representations from data',
+  'author': 'alice',
+  'category': 'ml',
+  'year': 2024,
+  'published': true
+}
+```
+
+Insert with a specific embedding model:
+```sql
+INSERT INTO COLLECTION articles VALUES {'text': 'hello world'} USING MODEL 'BAAI/bge-small-en-v1.5'
+```
+
+Insert into a hybrid collection (dense + sparse BM25 vectors):
+```sql
+INSERT INTO COLLECTION articles VALUES {'text': 'Attention is all you need'} USING HYBRID
+```
+
+Insert with custom models for both dense and sparse:
+```sql
+INSERT INTO COLLECTION articles VALUES {'text': 'hello world'}
+  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' SPARSE MODEL 'prithivida/Splade_PP_en_v1'
+```
+
+**What happens internally:**
+1. The `text` value is embedded into a dense vector using the configured model.
+2. In hybrid mode, a sparse BM25 vector is also generated.
+3. If `id` is provided, it is used as the point ID; otherwise a UUID is auto-generated.
+4. All fields except `id` are stored in the payload.
+5. The point is upserted into Qdrant.
+
+**Rules:**
+- `text` is always required. Omitting it raises an error.
+- `id`, when provided, must be an unsigned integer or UUID string.
+- If the collection already exists with a different vector size (from a different model), an error is raised with a clear message.
+- Hybrid inserts require a hybrid collection (created with `CREATE COLLECTION ... HYBRID`, auto-created on the first `USING HYBRID` insert, or **auto-detected** — if you omit `USING HYBRID` but the target collection is already a hybrid collection, QQL detects this and uses the hybrid insert path automatically).
+
+---
+
+## INSERT BULK — batch insert multiple points
+
+Inserts multiple documents in a single statement. Each item in the array must contain a `"text"` key. All items are embedded and upserted to Qdrant in **one batched call**, which is significantly faster than issuing one `INSERT` per record.
+
+If the collection does not exist yet, it is **created automatically** on the first bulk insert.
+
+Each record may optionally include an `id` field. This is the preferred way to keep seed data deterministic and to make follow-up operations like `RECOMMEND` or `DELETE` reproducible.
+
+**Syntax:**
+```
+INSERT BULK INTO COLLECTION <collection_name> VALUES [<dict>, <dict>, ...]
+INSERT BULK INTO COLLECTION <collection_name> VALUES [<dict>, ...] USING MODEL '<model_name>'
+INSERT BULK INTO COLLECTION <collection_name> VALUES [<dict>, ...] USING HYBRID
+INSERT BULK INTO COLLECTION <collection_name> VALUES [<dict>, ...] USING HYBRID DENSE MODEL '<model>' SPARSE MODEL '<model>'
+```
+
+**Examples:**
+
+Minimal bulk insert (text only):
+```sql
+INSERT BULK INTO COLLECTION articles VALUES [
+  {'text': 'Qdrant supports cosine similarity search'},
+  {'text': 'Sparse BM25 vectors enable keyword retrieval'},
+  {'text': 'Hybrid search combines dense and sparse results via RRF'}
+]
+```
+
+Bulk insert with metadata:
+```sql
+INSERT BULK INTO COLLECTION articles VALUES [
+  {'id': 1001, 'text': 'Attention is all you need', 'author': 'vaswani', 'year': 2017},
+  {'id': 1002, 'text': 'BERT: Pre-training of deep bidirectional transformers', 'author': 'devlin', 'year': 2018},
+  {'id': 1003, 'text': 'Language models are few-shot learners', 'author': 'brown', 'year': 2020}
+]
+```
+
+Bulk insert into a hybrid collection:
+```sql
+INSERT BULK INTO COLLECTION articles VALUES [
+  {'text': 'Dense retrieval with FAISS', 'domain': 'ir'},
+  {'text': 'Sparse retrieval with BM25', 'domain': 'ir'}
+] USING HYBRID
+```
+
+**Rules:**
+- Every dict in the array must contain a `"text"` key. Missing `text` on any item raises an error with the offending index.
+- An empty array `[]` raises an error.
+- `id`, when provided, must be an unsigned integer or UUID string.
+- Supports all the same `USING` clauses as single `INSERT`.
+
+---
+
+## Value Types in Payload Dicts
+
+The `VALUES` dictionary (and nested dicts) supports these types:
+
+| Type | Example | Notes |
+|---|---|---|
+| String | `'hello'` or `"hello"` | Single or double quotes |
+| Integer | `42`, `-7` | Whole numbers, negative allowed |
+| Float | `3.14`, `-0.5` | Decimal numbers |
+| Boolean | `true`, `false` | Case-insensitive |
+| Null | `null` | Case-insensitive |
+| Nested dict | `{'key': 'val'}` | Arbitrary nesting |
+| List | `['a', 'b', 1]` | Mixed types allowed |
+
+**Example using every type:**
+```sql
+INSERT INTO demo VALUES {
+  'text':    'example document',
+  'count':   42,
+  'score':   0.95,
+  'active':  true,
+  'deleted': false,
+  'ref':     null,
+  'meta':    {'source': 'web', 'lang': 'en'},
+  'tags':    ['ai', 'nlp', 'search']
+}
+```
+
+Trailing commas in dicts and lists are allowed:
+```sql
+INSERT INTO demo VALUES {'text': 'hi', 'x': 1,}
+```
diff --git a/docs/programmatic.md b/docs/programmatic.md
new file mode 100644
index 0000000..48d1cba
--- /dev/null
+++ b/docs/programmatic.md
@@ -0,0 +1,120 @@
+# Programmatic Usage
+
+QQL can be used as a Python library without the CLI.
+
+---
+
+## `run_query()` — high-level API
+
+```python
+from qql import run_query
+
+# Insert a document (dense-only)
+result = run_query(
+    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world', 'author': 'alice', 'year': 2024}",
+    url="http://localhost:6333",
+)
+print(result.message)   # "Inserted 1 point [<id>]"
+print(result.data)      # {"id": 1001 or "<uuid>", "collection": "notes"}
+
+# Insert with hybrid vectors
+result = run_query(
+    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world'} USING HYBRID",
+    url="http://localhost:6333",
+)
+print(result.message)   # "Inserted 1 point [<id>] (hybrid)"
+
+# Dense search with WHERE filter
+result = run_query(
+    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023 AND author != 'bot'",
+    url="http://localhost:6333",
+)
+for hit in result.data:
+    print(hit["score"], hit["payload"])
+
+# Hybrid search with WHERE filter
+result = run_query(
+    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 USING HYBRID WHERE year >= 2023",
+    url="http://localhost:6333",
+)
+for hit in result.data:
+    print(hit["score"], hit["payload"])
+
+# Bulk insert (all records embedded and upserted in one call)
+result = run_query(
+    """INSERT BULK INTO COLLECTION notes VALUES [
+      {'id': 1, 'text': 'first document', 'year': 2023},
+      {'id': 2, 'text': 'second document', 'year': 2024}
+    ]""",
+    url="http://localhost:6333",
+)
+print(result.message)   # "Inserted 2 points"
+
+# Recommend similar points using known IDs as positive examples
+result = run_query(
+    "RECOMMEND FROM notes POSITIVE IDS (1, 2) NEGATIVE IDS (3) LIMIT 5",
+    url="http://localhost:6333",
+)
+for hit in result.data:
+    print(hit["score"], hit["payload"])
+
+# Delete by filter
+result = run_query(
+    "DELETE FROM notes WHERE year < 2023",
+    url="http://localhost:6333",
+)
+print(result.message)   # "Deleted N point(s)"
+```
+
+---
+
+## Low-level pipeline API
+
+For more control, use the pipeline directly:
+
+```python
+from qdrant_client import QdrantClient
+from qql.lexer import Lexer
+from qql.parser import Parser
+from qql.executor import Executor
+from qql.config import QQLConfig
+
+client = QdrantClient(url="http://localhost:6333")
+config = QQLConfig(url="http://localhost:6333")
+executor = Executor(client, config)
+
+query = "SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 USING HYBRID WHERE category = 'cv'"
+tokens = Lexer().tokenize(query)
+node = Parser(tokens).parse()
+result = executor.execute(node)
+
+for hit in result.data:
+    print(hit["score"], hit["payload"])
+```
+
+---
+
+## ExecutionResult
+
+All operations return an `ExecutionResult`:
+
+```python
+@dataclass
+class ExecutionResult:
+    success: bool       # True if operation succeeded
+    message: str        # Human-readable summary
+    data: Any           # Operation-specific payload (see below)
+```
+
+| Operation | `result.data` type |
+|---|---|
+| INSERT (dense) | `{"id": int \| "<uuid>", "collection": "<name>"}` |
+| INSERT (hybrid) | `{"id": int \| "<uuid>", "collection": "<name>"}` |
+| INSERT BULK | `None` (count in `result.message`) |
+| SEARCH | `[{"id": str, "score": float, "payload": dict}, ...]` |
+| RECOMMEND | `[{"id": str, "score": float, "payload": dict}, ...]` |
+| SHOW COLLECTIONS | `["name1", "name2", ...]` |
+| CREATE COLLECTION | `None` |
+| CREATE INDEX | `None` |
+| DROP COLLECTION | `None` |
+| DELETE | `None` |
diff --git a/docs/reference.md b/docs/reference.md
new file mode 100644
index 0000000..cf054d5
--- /dev/null
+++ b/docs/reference.md
@@ -0,0 +1,182 @@
+# Reference — Models, Config, Project Structure, Errors
+
+---
+
+## Embedding Models
+
+QQL uses [Fastembed](https://github.com/qdrant/fastembed) to convert text into vectors locally — no external API call is needed.
+
+### Dense embedding (default)
+
+```
+sentence-transformers/all-MiniLM-L6-v2
+```
+
+- Vector dimensions: **384**
+- Size: ~90 MB (downloaded on first use, cached locally)
+- Good balance of speed and quality for English text
+
+### Sparse embedding (hybrid mode default)
+
+```
+Qdrant/bm25
+```
+
+- Classic BM25 with IDF weighting
+- Indices and values are generated as a sparse vector; no fixed dimensions
+- Uses asymmetric encoding: `embed()` for documents, `query_embed()` for queries
+
+### Specifying models
+
+```sql
+-- Dense only with custom model
+INSERT INTO docs VALUES {'text': 'hello'} USING MODEL 'BAAI/bge-small-en-v1.5'
+SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5'
+
+-- Hybrid with custom dense model
+SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
+
+-- Hybrid with both custom
+SEARCH docs SIMILAR TO 'hello' LIMIT 5
+  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' SPARSE MODEL 'prithivida/Splade_PP_en_v1'
+```
+
+### Commonly available dense models (Fastembed)
+
+| Model | Dimensions | Notes |
+|---|---|---|
+| `sentence-transformers/all-MiniLM-L6-v2` | 384 | Default. Fast, good general quality |
+| `BAAI/bge-small-en-v1.5` | 384 | Strong English retrieval |
+| `BAAI/bge-base-en-v1.5` | 768 | Higher quality, larger size |
+| `BAAI/bge-large-en-v1.5` | 1024 | Best quality, slowest |
+| `sentence-transformers/all-mpnet-base-v2` | 768 | Strong semantic similarity |
+
+### Commonly available sparse models (Fastembed)
+
+| Model | Notes |
+|---|---|
+| `Qdrant/bm25` | Default sparse model. Classic BM25 + IDF |
+| `prithivida/Splade_PP_en_v1` | SPLADE++ — strong keyword + semantic overlap |
+| `Qdrant/Unicoil` | UniCOIL sparse encoder |
+
+### Cross-encoder reranking (RERANK default)
+
+```
+cross-encoder/ms-marco-MiniLM-L-6-v2
+```
+
+| Model | Notes |
+|---|---|
+| `cross-encoder/ms-marco-MiniLM-L-6-v2` | Default. Fast passage reranker |
+| `cross-encoder/ms-marco-MiniLM-L-12-v2` | Larger, higher quality |
+| `BAAI/bge-reranker-base` | Strong general-purpose reranker |
+| `BAAI/bge-reranker-large` | Highest quality, slower |
+
+> Models are downloaded automatically on first use and cached by Fastembed.
+
+### Model consistency rule
+
+Every collection is created with a fixed vector size determined by the model used on first INSERT. If you try to INSERT using a different model that produces different dimensions, QQL raises:
+
+```
+Error: Vector dimension mismatch: collection 'docs' expects 384 dims,
+but model produces 768 dims. Specify a compatible model with USING MODEL '<model>'.
+```
+
+---
+
+## Value Types in Dictionaries
+
+| Type | Example | Notes |
+|---|---|---|
+| String | `'hello'` or `"hello"` | Single or double quotes |
+| Integer | `42`, `-7` | Whole numbers, negative allowed |
+| Float | `3.14`, `-0.5` | Decimal numbers |
+| Boolean | `true`, `false` | Case-insensitive |
+| Null | `null` | Case-insensitive |
+| Nested dict | `{'key': 'val'}` | Arbitrary nesting |
+| List | `['a', 'b', 1]` | Mixed types allowed |
+
+Trailing commas in dicts and lists are allowed.
+
+---
+
+## Configuration File
+
+The connection config is stored at `~/.qql/config.json`:
+
+```json
+{
+  "url": "http://localhost:6333",
+  "secret": null,
+  "default_model": "sentence-transformers/all-MiniLM-L6-v2"
+}
+```
+
+| Field | Description |
+|---|---|
+| `url` | Qdrant instance URL |
+| `secret` | API key (null if not required) |
+| `default_model` | Dense embedding model used when no `USING MODEL` clause is given |
+
+You can edit this file directly to change the default model without reconnecting.
+
+---
+
+## Project Structure
+
+```
+qql/
+├── pyproject.toml          # Package config; installs the `qql` CLI command
+├── src/
+│   └── qql/
+│       ├── __init__.py     # Public API: run_query()
+│       ├── cli.py          # CLI entry point: connect, disconnect, execute, dump, REPL
+│       ├── config.py       # QQLConfig dataclass + ~/.qql/config.json I/O
+│       ├── exceptions.py   # QQLError, QQLSyntaxError, QQLRuntimeError
+│       ├── lexer.py        # Tokenizer: string → List[Token]
+│       ├── ast_nodes.py    # Frozen dataclasses for each statement and filter type
+│       ├── parser.py       # Recursive descent parser: tokens → AST node
+│       ├── embedder.py     # Embedder (dense) + SparseEmbedder (BM25) + CrossEncoderEmbedder (rerank)
+│       ├── executor.py     # AST node → Qdrant client call + filter + hybrid search
+│       ├── script.py       # Script runner: parse and execute .qql files statement by statement
+│       └── dumper.py       # Collection exporter: scroll all points → .qql INSERT BULK script
+└── tests/
+    ├── test_lexer.py       # Tokenizer unit tests
+    ├── test_parser.py      # Parser unit tests
+    ├── test_executor.py    # Executor unit tests (mocked Qdrant client)
+    ├── test_script.py      # Script runner unit tests
+    └── test_dumper.py      # Dumper unit tests
+```
+
+---
+
+## Running Tests
+
+Tests do not require a running Qdrant instance — the Qdrant client is mocked.
+
+```bash
+pytest tests/ -v
+```
+
+Expected output: **375 tests passing**.
+
+---
+
+## Error Reference
+
+| Error | Cause | Fix |
+|---|---|---|
+| `Not connected. Run: qql connect --url <url>` | No `~/.qql/config.json` found | Run `qql connect --url <url>` first |
+| `Connection failed: ...` | Qdrant unreachable at given URL | Check that Qdrant is running and the URL is correct |
+| `INSERT requires a 'text' field in VALUES` | `text` key missing from the VALUES dict | Add `'text': '...'` to your dict |
+| `Vector dimension mismatch: collection '...' expects X dims, but model produces Y dims` | Model used in INSERT differs from the one used to create the collection | Use `USING MODEL` to specify the same model as the collection was created with |
+| `Collection '...' does not exist` | SEARCH / DROP / DELETE on a non-existent collection | Check name spelling or run `SHOW COLLECTIONS` |
+| `Unexpected token '...'; expected a QQL statement keyword` | Unrecognized statement | Check the query syntax; QQL does not support SQL SELECT |
+| `Unterminated string literal (at position N)` | A string is missing its closing quote | Close the string with a matching `'` or `"` |
+| `Unexpected character '@' (at position N)` | A character not part of QQL syntax | Remove or quote the offending character |
+| `Expected a filter operator after field '...'` | Unknown operator in WHERE clause | Use one of: `=`, `!=`, `>`, `>=`, `<`, `<=`, `IN`, `NOT IN`, `BETWEEN`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `MATCH` |
+| `Expected ')' ...` | Unclosed parenthesis in WHERE clause | Add the missing `)` to close the group |
+| `Qdrant error during SEARCH: ...` | Hybrid search on a non-hybrid collection, or wrong vector names | Ensure the collection was created with `HYBRID` before using `USING HYBRID` in INSERT/SEARCH |
+| `Unknown index type '...'` | Invalid schema type in CREATE INDEX | Use one of: `keyword`, `integer`, `float`, `bool`, `text`, `geo`, `datetime` |
+| `Qdrant error during CREATE INDEX: ...` | Qdrant rejected the index creation | Check field name and collection state |
diff --git a/docs/robots.txt b/docs/robots.txt
new file mode 100644
index 0000000..19e85db
--- /dev/null
+++ b/docs/robots.txt
@@ -0,0 +1,4 @@
+User-agent: *
+Allow: /
+
+Sitemap: https://pavanjava.github.io/qql/sitemap.xml
diff --git a/docs/scripts.md b/docs/scripts.md
new file mode 100644
index 0000000..6fa75f1
--- /dev/null
+++ b/docs/scripts.md
@@ -0,0 +1,159 @@
+# Script Files — EXECUTE and DUMP
+
+QQL supports reading from and writing to `.qql` script files, making it easy to automate bulk operations, seed databases, and back up collections.
+
+---
+
+## EXECUTE — run a .qql script file
+
+Execute a file containing multiple QQL statements in sequence. Each statement is parsed and executed in order. `--` comments are stripped before parsing.
+
+**CLI usage:**
+```bash
+qql execute /path/to/script.qql
+
+# Stop on first error instead of continuing through all statements
+qql execute /path/to/script.qql --stop-on-error
+```
+
+**In-shell usage (inside the QQL REPL):**
+```
+qql> EXECUTE /path/to/script.qql
+qql> \e /path/to/script.qql
+```
+
+**Script format:**
+
+```sql
+-- This is a comment — the entire line is ignored
+-- ============================================================
+--  QQL Script — populate articles collection
+-- ============================================================
+
+-- Step 1: create the collection
+CREATE COLLECTION articles
+
+-- Step 2: bulk insert records
+INSERT BULK INTO COLLECTION articles VALUES [
+  {'text': 'Neural networks learn representations', 'year': 2023},
+  {'text': 'Attention mechanisms in transformers',  'year': 2024}
+]
+
+-- Step 3: verify
+SHOW COLLECTIONS
+```
+
+**Rules:**
+- `--` to end-of-line is a comment and is ignored (inline or full-line)
+- Statements can span multiple lines (e.g. `INSERT BULK ... VALUES [...]`)
+- `RECOMMEND` statements work in `.qql` files the same way they do in the REPL
+- Blank lines between statements are ignored
+- By default all statements run even if one fails; use `--stop-on-error` to halt early
+
+**Included examples:**
+- [`resources/sample.qql`](../resources/sample.qql) seeds the demo medical dataset
+- [`resources/sample_v2.qql`](../resources/sample_v2.qql) is a compact end-to-end example with explicit IDs and runnable `RECOMMEND` statements
+
+**Example output:**
+```
+Executing: /path/to/script.qql
+
+[1/3] CREATE COLLECTION articles
+  ✓ Collection 'articles' created (384-dimensional vectors, cosine distance)
+[2/3] INSERT BULK INTO COLLECTION articles VALUES [ …
+  ✓ Inserted 2 points
+[3/3] SHOW COLLECTIONS
+  ✓ 1 collection(s) found
+
+Done. 3/3 statement(s) succeeded.
+```
+
+---
+
+## DUMP COLLECTION — export collection to a .qql script file
+
+Export every point in a collection to a `.qql` script file. The generated file is valid QQL that re-creates the collection and re-inserts all payload data. Points are written in batches of 50 as `INSERT BULK` statements.
+
+> **Scope of a dump:** The generated script preserves collection topology (dense vs hybrid) and all point payloads. It does **not** preserve quantization config, pinned model / vector dimensions, or payload indexes — those must be re-applied manually after import if needed.
+
+**CLI usage:**
+```bash
+qql dump <collection_name> <output.qql>
+```
+
+**In-shell usage (inside the QQL REPL):**
+```
+qql> DUMP COLLECTION <name> <output.qql>
+qql> DUMP <name> <output.qql>
+```
+
+Both forms are equivalent. The shorter `DUMP <name> <file>` form is a convenience shorthand.
+
+**Example:**
+```bash
+qql dump medical_records /tmp/medical_records.qql
+```
+
+```
+Dumping: 'medical_records'  →  /tmp/medical_records.qql
+
+  Collection type : hybrid (dense + sparse)
+  Points          : 41
+  Batches         : 1  (50 points/batch)
+
+  [1/1] wrote 41 point(s)
+
+Done. 41 point(s) written.
+```
+
+**Generated file structure:**
+```sql
+-- ============================================================
+-- QQL Dump — collection: medical_records
+-- Generated : 2026-04-19 14:32:11
+-- Points    : 41
+-- Type      : hybrid (dense + sparse)
+-- Note      : Re-importing re-embeds all text using the
+--             configured model (see: qql connect).
+-- ============================================================
+
+CREATE COLLECTION medical_records HYBRID
+
+-- Batch 1 / 1  (records 1–41)
+INSERT BULK INTO COLLECTION medical_records VALUES [
+  {
+    'text': 'Alzheimers disease is characterized by...',
+    'title': 'Alzheimers Disease Overview',
+    'department': 'neurology',
+    'year': 2023,
+    'peer_reviewed': true
+  },
+  ...
+] USING HYBRID
+
+-- ============================================================
+-- End of dump
+-- Written : 41
+-- Skipped : 0  (no 'text' field)
+-- ============================================================
+```
+
+**Round-trip workflow — data migration / partial restore:**
+```bash
+# 1. Dump the collection
+qql dump medical_records backup.qql
+
+# 2. Drop it
+qql> DROP COLLECTION medical_records
+
+# 3. Restore from the dump
+qql execute backup.qql
+```
+
+**Rules and notes:**
+- Points without a `'text'` payload field are **skipped** (counted in the footer comment).
+- Hybrid collections produce `CREATE COLLECTION <name> HYBRID` and `INSERT BULK ... USING HYBRID` statements.
+- Dense collections produce plain `CREATE COLLECTION <name>` and `INSERT BULK` statements.
+- All payload value types are preserved: strings, integers, floats, booleans (`true`/`false`), `null`, lists, and nested dicts.
+- Re-importing re-embeds all text using your currently configured model — use the same model as the original collection to preserve semantic accuracy.
+- Parent directories of the output path are created automatically.
diff --git a/docs/search.md b/docs/search.md
new file mode 100644
index 0000000..55d4e10
--- /dev/null
+++ b/docs/search.md
@@ -0,0 +1,293 @@
+# SEARCH, RECOMMEND, Hybrid Search & Reranking
+
+---
+
+## SEARCH — find similar points
+
+Performs a **semantic similarity search**: your query text is embedded with the same model used during insert, then Qdrant finds the nearest vectors by cosine distance.
+
+An optional `WHERE` clause filters the candidate set **before** similarity ranking so you only get results that match both the semantic query and the payload conditions.
+
+**Syntax:**
+```
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n>
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING MODEL '<model_name>'
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING MODEL '<model>'] WHERE <filter>
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING SPARSE [MODEL '<sparse_model>']
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> EXACT
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false }
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] RERANK [MODEL '<reranker_model>']
+```
+
+**Examples:**
+
+Basic search, return top 5 results:
+```sql
+SEARCH articles SIMILAR TO 'machine learning algorithms' LIMIT 5
+```
+
+Search only papers published after 2020:
+```sql
+SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020
+```
+
+Hybrid search (combines dense semantic + sparse BM25 keyword retrieval via RRF):
+```sql
+SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 USING HYBRID
+```
+
+Sparse-only search (queries only the `sparse` named vector — useful for pure keyword retrieval):
+```sql
+SEARCH medical_knowledge SIMILAR TO 'beta blocker contraindications' LIMIT 5 USING SPARSE
+```
+
+> **Sparse scores are unbounded dot-products.** Unlike dense cosine similarity (which is bounded 0–1), sparse vector scores are raw dot-products that can exceed 1.0 — scores like 8.3 or 14.5 are perfectly normal and expected. Do not compare sparse scores to dense cosine scores; they are on different scales.
+
+Exact search for recall debugging:
+```sql
+SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 EXACT
+```
+
+Search with query-time HNSW tuning:
+```sql
+SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { hnsw_ef: 128 }
+```
+
+**Output:**
+
+Results are displayed as a table with three columns:
+
+```
+ Score  │ ID                                   │ Payload
+────────┼──────────────────────────────────────┼──────────────────────────────────
+ 0.9241 │ 3f2e1a4b-...                          │ {'text': 'Neural networks...', 'author': 'alice'}
+ 0.8817 │ 7a1b2c3d-...                          │ {'text': 'Attention is all...', 'tags': [...]}
+```
+
+**Important:** Use the same model for SEARCH as you used for INSERT. Mixing models produces meaningless scores because the vectors live in different spaces.
+
+---
+
+## Query-Time Search Params (`EXACT`, `WITH`)
+
+Use these when you want to debug retrieval quality or tune recall without changing collection-level settings.
+
+| Syntax | Effect |
+|---|---|
+| `EXACT` | Shorthand for exact KNN search (`exact=true`) |
+| `WITH { hnsw_ef: 128 }` | Increase HNSW exploration at query time |
+| `WITH { exact: true }` | Force exact KNN explicitly |
+| `WITH { acorn: true }` | Enable ACORN for filtered queries |
+
+- `EXACT` can appear after `LIMIT` or after `RERANK`
+- `WITH { ... }` can appear after `WHERE` and/or `RERANK`
+- Supported `WITH` keys are only `hnsw_ef`, `exact`, and `acorn`
+
+```sql
+-- Exact KNN baseline
+SEARCH articles SIMILAR TO 'programming language' LIMIT 5 EXACT
+
+-- Raise HNSW ef at query time
+SEARCH articles SIMILAR TO 'transformers' LIMIT 10 WITH { hnsw_ef: 256 }
+
+-- Filtered search with ACORN
+SEARCH articles SIMILAR TO 'RAG' LIMIT 10 WHERE tag = 'li' WITH { acorn: true }
+```
+
+---
+
+## Hybrid Search (USING HYBRID)
+
+Hybrid search combines **dense semantic vectors** and **sparse BM25 keyword vectors** in a single query and merges the results with Qdrant's **Reciprocal Rank Fusion (RRF)** algorithm. This typically outperforms either method alone.
+
+### How it works internally
+
+1. Both a dense vector (`TextEmbedding`) and a sparse BM25 vector (`SparseTextEmbedding`) are generated from your query text.
+2. Qdrant fetches the top candidates from each index independently (`prefetch limit = LIMIT × 4`).
+3. The two result lists are merged using RRF — a rank-based fusion that does not require score normalization.
+4. The final top-N results are returned.
+
+### Step 1: Create a hybrid collection
+
+```sql
+-- Shorthand (backward compatible)
+CREATE COLLECTION articles HYBRID
+
+-- USING form — allows specifying a dense model
+CREATE COLLECTION articles USING HYBRID
+CREATE COLLECTION articles USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
+```
+
+### Step 2: Insert with hybrid vectors
+
+```sql
+INSERT INTO COLLECTION articles VALUES {
+  'text': 'Attention is all you need',
+  'author': 'Vaswani et al.',
+  'year': 2017
+} USING HYBRID
+```
+
+### Step 3: Search with hybrid retrieval
+
+```sql
+-- Basic hybrid search
+SEARCH articles SIMILAR TO 'transformer architecture' LIMIT 10 USING HYBRID
+
+-- Hybrid search with a WHERE filter
+SEARCH articles SIMILAR TO 'attention' LIMIT 10 USING HYBRID WHERE year >= 2017
+
+-- Hybrid with custom dense model
+SEARCH articles SIMILAR TO 'embeddings' LIMIT 5
+  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
+
+-- Hybrid with both custom models
+SEARCH articles SIMILAR TO 'sparse retrieval' LIMIT 5
+  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' SPARSE MODEL 'prithivida/Splade_PP_en_v1'
+```
+
+### Model defaults in hybrid mode
+
+| Argument | Default |
+|---|---|
+| Dense model | configured default (`sentence-transformers/all-MiniLM-L6-v2`) |
+| Sparse model | `Qdrant/bm25` |
+
+### Dense vs. hybrid — when to use which
+
+| Situation | Recommendation |
+|---|---|
+| Semantic similarity (paraphrasing, synonyms) | Dense only |
+| Exact keyword matching (product codes, names) | Hybrid or BM25-only |
+| General-purpose retrieval (unknown query distribution) | Hybrid |
+| Low latency / small collection | Dense only |
+
+---
+
+## RECOMMEND — retrieve by example IDs
+
+Performs a Qdrant recommendation query using existing point IDs as positive and optional negative examples. Qdrant uses those examples to retrieve nearby points, and QQL automatically excludes the seed IDs from the results.
+
+**Syntax:**
+```sql
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n>
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) NEGATIVE IDS (<id>, ...) LIMIT <n>
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) STRATEGY '<strategy>' LIMIT <n>
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> WHERE <filter>
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> OFFSET <n>
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> SCORE THRESHOLD <f>
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> WITH { exact: true, hnsw_ef: <n> }
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> LOOKUP FROM <collection>
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> LOOKUP FROM <collection> VECTOR '<name>'
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> USING '<vector_name>'
+```
+
+**Examples:**
+
+Recommend more results like two known articles:
+```sql
+RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
+```
+
+Recommend similar results while steering away from one bad example:
+```sql
+RECOMMEND FROM articles POSITIVE IDS (1001, 1002) NEGATIVE IDS (1009) LIMIT 5
+```
+
+Use Qdrant's `best_score` recommendation strategy:
+```sql
+RECOMMEND FROM articles POSITIVE IDS (1001) STRATEGY 'best_score' LIMIT 10
+```
+
+Recommend only within a filtered subset:
+```sql
+RECOMMEND FROM articles POSITIVE IDS (1001) LIMIT 5 WHERE year >= 2020 AND status = 'published'
+```
+
+Cross-collection recommend (look up example IDs from another collection):
+```sql
+RECOMMEND FROM target_collection
+  POSITIVE IDS ('a')
+  LOOKUP FROM source_collection VECTOR 'dense'
+  LIMIT 5
+```
+
+Full-featured recommend:
+```sql
+RECOMMEND FROM articles
+  POSITIVE IDS (1001, 1002)
+  NEGATIVE IDS (1009)
+  STRATEGY 'best_score'
+  LOOKUP FROM other_collection VECTOR 'dense'
+  USING 'dense'
+  LIMIT 10
+  OFFSET 5
+  SCORE THRESHOLD 0.5
+  WHERE year >= 2020
+  WITH { exact: true }
+```
+
+**Supported strategies:** `average_vector`, `best_score`, `sum_scores`
+
+**Clause order:** `POSITIVE IDS` → `NEGATIVE IDS` → `STRATEGY` → `LOOKUP FROM` → `USING` → `LIMIT` → `OFFSET` → `SCORE THRESHOLD` → `WHERE` → `WITH`
+
+---
+
+## Cross-Encoder Reranking (RERANK)
+
+Appending `RERANK` to any SEARCH statement activates a **second-pass relevance scoring** step using a cross-encoder model. Cross-encoders process the **(query, document)** pair jointly, producing a more accurate relevance score at the cost of extra compute.
+
+### How it works internally
+
+1. Qdrant executes the normal dense or hybrid search, but fetches `LIMIT × 4` candidates.
+2. Each candidate's `payload["text"]` is paired with the original query text.
+3. The cross-encoder scores all (query, document) pairs in one batch.
+4. Results are sorted **descending by cross-encoder score** and sliced to `LIMIT`.
+5. The `score` column reflects the cross-encoder relevance score (raw logits — higher is more relevant).
+
+**Syntax:**
+```
+SEARCH <name> SIMILAR TO '<query>' LIMIT <n> RERANK
+SEARCH <name> SIMILAR TO '<query>' LIMIT <n> RERANK MODEL '<cross_encoder_model>'
+SEARCH ... LIMIT n [USING ...] [WHERE ...] RERANK [MODEL '...']
+```
+
+**Examples:**
+
+Dense search + rerank (default cross-encoder):
+```sql
+SEARCH articles SIMILAR TO 'machine learning for healthcare' LIMIT 5 RERANK
+```
+
+Hybrid search + rerank (best of all three worlds):
+```sql
+SEARCH articles SIMILAR TO 'attention mechanism in transformers' LIMIT 10 USING HYBRID RERANK
+```
+
+Custom cross-encoder model:
+```sql
+SEARCH articles SIMILAR TO 'semantic search' LIMIT 5
+  RERANK MODEL 'cross-encoder/ms-marco-MiniLM-L-6-v2'
+```
+
+**Default cross-encoder model:** `cross-encoder/ms-marco-MiniLM-L-6-v2`
+
+| Model | Notes |
+|---|---|
+| `cross-encoder/ms-marco-MiniLM-L-6-v2` | Default. Fast and accurate for passage reranking |
+| `cross-encoder/ms-marco-MiniLM-L-12-v2` | Larger, higher quality, slower |
+| `BAAI/bge-reranker-base` | BGE reranker, strong general-purpose performance |
+| `BAAI/bge-reranker-large` | Highest quality BGE reranker, slower |
+
+### When to use RERANK
+
+| Situation | Recommendation |
+|---|---|
+| High-precision retrieval (legal, medical, research) | Add `RERANK` |
+| Small LIMIT (top-3 or top-5 results) | Very effective |
+| Low latency required | Skip `RERANK` (adds ~100–500 ms per batch) |
+| Large collections with keyword-heavy queries | `USING HYBRID RERANK` |
+
+> **Note on scores:** After reranking, the `score` column shows the cross-encoder's raw logit (can be any real number, unbounded). Do not compare reranked scores to non-reranked cosine similarity scores.
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
new file mode 100644
index 0000000..48606b2
--- /dev/null
+++ b/docs/sitemap.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+
+  <url>
+    <loc>https://pavanjava.github.io/qql/</loc>
+    <changefreq>monthly</changefreq>
+    <priority>1.0</priority>
+  </url>
+
+  <url>
+    <loc>https://pavanjava.github.io/qql/getting-started</loc>
+    <changefreq>monthly</changefreq>
+    <priority>0.9</priority>
+  </url>
+
+  <url>
+    <loc>https://pavanjava.github.io/qql/insert</loc>
+    <changefreq>monthly</changefreq>
+    <priority>0.8</priority>
+  </url>
+
+  <url>
+    <loc>https://pavanjava.github.io/qql/search</loc>
+    <changefreq>monthly</changefreq>
+    <priority>0.8</priority>
+  </url>
+
+  <url>
+    <loc>https://pavanjava.github.io/qql/filters</loc>
+    <changefreq>monthly</changefreq>
+    <priority>0.8</priority>
+  </url>
+
+  <url>
+    <loc>https://pavanjava.github.io/qql/collections</loc>
+    <changefreq>monthly</changefreq>
+    <priority>0.8</priority>
+  </url>
+
+  <url>
+    <loc>https://pavanjava.github.io/qql/scripts</loc>
+    <changefreq>monthly</changefreq>
+    <priority>0.7</priority>
+  </url>
+
+  <url>
+    <loc>https://pavanjava.github.io/qql/programmatic</loc>
+    <changefreq>monthly</changefreq>
+    <priority>0.7</priority>
+  </url>
+
+  <url>
+    <loc>https://pavanjava.github.io/qql/reference</loc>
+    <changefreq>monthly</changefreq>
+    <priority>0.7</priority>
+  </url>
+
+</urlset>
diff --git a/pyproject.toml b/pyproject.toml
index 9ee49af..9aec66b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "qql-cli"
-version = "1.4.0"
-description = "A SQL-like query language CLI wrapper for Qdrant vector database"
+version = "2.0.0"
+description = "QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), WHERE clause filters, script execution, and collection dump/restore."
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.12"
@@ -11,10 +11,16 @@ authors = [
 keywords = [
     "qdrant",
     "vector-database",
+    "vector-search",
     "cli",
     "query-language",
+    "sql",
     "embeddings",
     "semantic-search",
+    "hybrid-search",
+    "rag",
+    "nlp",
+    "fastembed",
 ]
 classifiers = [
     "Development Status :: 3 - Alpha",
@@ -27,6 +33,8 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
     "Topic :: Database :: Front-Ends",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Utilities",
+    "Topic :: Text Processing :: Indexing",
 ]
 dependencies = [
     "qdrant-client[fastembed]>=1.13.0",
@@ -36,8 +44,9 @@ dependencies = [
 ]
 
 [project.urls]
-Homepage = "https://github.com/pavanjava/qql"
-Repository = "https://github.com/pavanjava/qql"
+Homepage      = "https://github.com/pavanjava/qql"
+Repository    = "https://github.com/pavanjava/qql"
+Documentation = "https://pavanjava.github.io/qql"
 "Bug Tracker" = "https://github.com/pavanjava/qql/issues"
 
 [project.scripts]