diff --git a/README.md b/README.md
index e7f3af2..4dc5663 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![PyPI version](https://img.shields.io/pypi/v/qql-cli?color=blue&label=PyPI)](https://pypi.org/project/qql-cli/)
 [![Python 3.12+](https://img.shields.io/pypi/pyversions/qql-cli)](https://pypi.org/project/qql-cli/)
 [![MIT License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
-[![Tests](https://img.shields.io/badge/tests-500%20passing-brightgreen)](tests/)
+[![Tests](https://img.shields.io/badge/tests-549%20passing-brightgreen)](tests/)
 
 Write `INSERT`, `SELECT`, `SEARCH`, `SCROLL`, `RECOMMEND`, `UPDATE`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, grouped search (GROUP BY), cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
 
@@ -50,6 +50,18 @@ Your query string
 
 When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) by default to merge the results of both retrieval methods. You can switch hybrid search to DBSF with `FUSION 'dbsf'`.
 
+QQL also exposes a **programmatic API** for use inside Python applications — no CLI required:
+
+```python
+from qql import Connection
+
+with Connection("http://localhost:6333") as conn:
+    conn.run_query("INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is fast'}")
+    result = conn.run_query("SEARCH notes SIMILAR TO 'vector database' LIMIT 5")
+    for hit in result.data:
+        print(hit["score"], hit["payload"])
+```
+
 ---
 
 ## Installation
@@ -86,7 +98,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith
 | [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
 | [Collections & Quantization](docs/collections.md) | SHOW, CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX, UPDATE VECTOR, UPDATE PAYLOAD |
 | [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
-| [Programmatic Usage](docs/programmatic.md) | Use QQL as a Python library |
+| [Programmatic Usage](docs/programmatic.md) | Use QQL as a Python library via `Connection` or `run_query()` |
 | [Reference: Models / Config / Errors](docs/reference.md) | Embedding models, config file, error reference |
 
 ---
@@ -166,7 +178,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
 pytest tests/ -v
 ```
 
-Expected: **500 tests passing**.
+Expected: **549 tests passing**.
 
 ---
 
diff --git a/docs/programmatic.md b/docs/programmatic.md
index ad85fa4..1d0e6b6 100644
--- a/docs/programmatic.md
+++ b/docs/programmatic.md
@@ -4,98 +4,203 @@ QQL can be used as a Python library without the CLI.
 
 ---
 
-## `run_query()` — high-level API
+## `Connection` — Primary API
+
+`Connection` is the recommended way to use QQL programmatically. It opens a
+single connection to Qdrant once and reuses it for every `run_query()` call —
+more efficient than the legacy `run_query()` function, which creates a new
+client on every invocation.
+
+### Basic usage
 
 ```python
-from qql import run_query
+from qql import Connection
+
+conn = Connection("http://localhost:6333")
 
 # Insert a document (dense-only)
-result = run_query(
-    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world', 'author': 'alice', 'year': 2024}",
-    url="http://localhost:6333",
+result = conn.run_query(
+    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world', 'author': 'alice', 'year': 2024}"
 )
 print(result.message)   # "Inserted 1 point [<id>]"
-print(result.data)      # {"id": 1001 or "<uuid>", "collection": "notes"}
+print(result.data)      # {"id": "<uuid>", "collection": "notes"}
 
-# Insert with hybrid vectors
-result = run_query(
-    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world'} USING HYBRID",
-    url="http://localhost:6333",
-)
-print(result.message)   # "Inserted 1 point [<id>] (hybrid)"
-
-# Dense search with WHERE filter
-result = run_query(
-    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023 AND author != 'bot'",
-    url="http://localhost:6333",
+# Search
+result = conn.run_query(
+    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023"
 )
 for hit in result.data:
     print(hit["score"], hit["payload"])
 
-# Hybrid search with WHERE filter
-result = run_query(
-    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 USING HYBRID WHERE year >= 2023",
-    url="http://localhost:6333",
-)
-for hit in result.data:
-    print(hit["score"], hit["payload"])
+conn.close()
+```
 
-# Scroll / pagination
-result = run_query(
-    "SCROLL FROM notes LIMIT 2",
-    url="http://localhost:6333",
-)
-for point in result.data["points"]:
-    print(point["id"], point["payload"])
-print(result.data["next_offset"])
+### Context manager (preferred)
 
-# Bulk insert (all records embedded and upserted in one call)
-result = run_query(
-    """INSERT BULK INTO COLLECTION notes VALUES [
-      {'id': 1, 'text': 'first document', 'year': 2023},
-      {'id': 2, 'text': 'second document', 'year': 2024}
-    ]""",
-    url="http://localhost:6333",
-)
-print(result.message)   # "Inserted 2 points"
+The context manager guarantees the HTTP connection pool is released even if an
+exception occurs:
 
-# Recommend similar points using known IDs as positive examples
-result = run_query(
-    "RECOMMEND FROM notes POSITIVE IDS (1, 2) NEGATIVE IDS (3) LIMIT 5",
-    url="http://localhost:6333",
-)
-for hit in result.data:
-    print(hit["score"], hit["payload"])
+```python
+from qql import Connection
 
-# Retrieve a point by ID
-result = run_query(
-    "SELECT * FROM notes WHERE id = 1",
-    url="http://localhost:6333",
-)
-print(result.data)      # {"id": "1", "payload": {...}}
+with Connection("http://localhost:6333") as conn:
+    # All queries share the same connection
+    conn.run_query(
+        "INSERT INTO COLLECTION notes VALUES {'text': 'hello world'} USING HYBRID"
+    )
+    result = conn.run_query(
+        "SEARCH notes SIMILAR TO 'hello' LIMIT 5 USING HYBRID WHERE year >= 2023"
+    )
+    for hit in result.data:
+        print(hit["score"], hit["payload"])
+```
+
+### Qdrant Cloud
+
+```python
+from qql import Connection
+
+with Connection("https://<your-cluster>.qdrant.io", secret="<your-api-key>") as conn:
+    result = conn.run_query("SHOW COLLECTIONS")
+    print(result.data)
+```
+
+### Custom embedding model
 
-# Delete by filter
+```python
+from qql import Connection
+
+with Connection(
+    "http://localhost:6333",
+    default_model="BAAI/bge-base-en-v1.5",
+) as conn:
+    conn.run_query(
+        "INSERT INTO COLLECTION articles VALUES {'text': 'Attention is all you need'}"
+    )
+```
+
+### All statement examples
+
+```python
+from qql import Connection
+
+with Connection("http://localhost:6333") as conn:
+
+    # Hybrid insert
+    conn.run_query(
+        "INSERT INTO COLLECTION notes VALUES {'text': 'hello world'} USING HYBRID"
+    )
+
+    # Dense search with WHERE filter
+    result = conn.run_query(
+        "SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023 AND author != 'bot'"
+    )
+    for hit in result.data:
+        print(hit["score"], hit["payload"])
+
+    # Hybrid search
+    result = conn.run_query(
+        "SEARCH notes SIMILAR TO 'hello' LIMIT 5 USING HYBRID WHERE year >= 2023"
+    )
+
+    # Scroll / pagination
+    result = conn.run_query("SCROLL FROM notes LIMIT 2")
+    for point in result.data["points"]:
+        print(point["id"], point["payload"])
+    next_cursor = result.data["next_offset"]   # str | int | None
+
+    # Continue pagination
+    if next_cursor is not None:
+        result = conn.run_query(f"SCROLL FROM notes AFTER '{next_cursor}' LIMIT 2")
+
+    # Bulk insert
+    result = conn.run_query(
+        """INSERT BULK INTO COLLECTION notes VALUES [
+          {'id': 1, 'text': 'first document', 'year': 2023},
+          {'id': 2, 'text': 'second document', 'year': 2024}
+        ]"""
+    )
+    print(result.message)   # "Inserted 2 points"
+
+    # Recommend similar points
+    result = conn.run_query(
+        "RECOMMEND FROM notes POSITIVE IDS (1, 2) NEGATIVE IDS (3) LIMIT 5"
+    )
+    for hit in result.data:
+        print(hit["score"], hit["payload"])
+
+    # Retrieve a point by ID
+    result = conn.run_query("SELECT * FROM notes WHERE id = 1")
+    print(result.data)      # {"id": "1", "payload": {...}}
+
+    # Delete by filter
+    conn.run_query("DELETE FROM notes WHERE year < 2023")
+
+    # Inspect collection diagnostics
+    result = conn.run_query("SHOW COLLECTION notes")
+    print(result.data["topology"])         # "dense" or "hybrid"
+    print(result.data["vectors"])          # {"": {...}} or {"dense": {...}}
+    print(result.data["payload_schema"])   # field index info, or None
+```
+
+### `Connection` parameters
+
+| Parameter | Type | Default | Description |
+|---|---|---|---|
+| `url` | `str` | `"http://localhost:6333"` | Qdrant instance URL |
+| `secret` | `str \| None` | `None` | API key; `None` for unauthenticated |
+| `default_model` | `str \| None` | `None` → `sentence-transformers/all-MiniLM-L6-v2` | Dense embedding model used when no `USING MODEL` clause is given |
+
+### Power-user: `executor` property
+
+For low-level access to the pipeline, use `conn.executor` directly:
+
+```python
+from qql import Connection
+from qql.lexer import Lexer
+from qql.parser import Parser
+
+with Connection("http://localhost:6333") as conn:
+    tokens = Lexer().tokenize("SEARCH docs SIMILAR TO 'hello' LIMIT 5")
+    node = Parser(tokens).parse()
+    result = conn.executor.execute(node)
+```
+
+---
+
+## `run_query()` — Legacy one-shot API
+
+> **Note:** `run_query()` is kept for backward compatibility. It creates a new
+> `Connection` (and therefore a new `QdrantClient`) on every call. For
+> workloads that issue more than one query, use `Connection` instead.
+
+```python
+from qql import run_query
+
+# Insert a document
 result = run_query(
-    "DELETE FROM notes WHERE year < 2023",
+    "INSERT INTO COLLECTION notes VALUES {'text': 'hello world', 'author': 'alice', 'year': 2024}",
     url="http://localhost:6333",
 )
-print(result.message)   # "Deleted N point(s)"
+print(result.message)
 
-# Inspect collection diagnostics
+# Search
 result = run_query(
-    "SHOW COLLECTION notes",
+    "SEARCH notes SIMILAR TO 'hello' LIMIT 5 WHERE year >= 2023",
     url="http://localhost:6333",
 )
-print(result.data["topology"])         # "dense" or "hybrid"
-print(result.data["vectors"])          # {"": {...}} or {"dense": {...}, ...}
-print(result.data["payload_schema"])   # {"field": {"type": "keyword", ...}, ...} or None
+for hit in result.data:
+    print(hit["score"], hit["payload"])
 ```
 
+`run_query()` accepts the same `url`, `secret`, and `default_model` parameters
+as `Connection.__init__()`.
+
 ---
 
 ## Low-level pipeline API
 
-For more control, use the pipeline directly:
+For full control, use the Lexer → Parser → Executor pipeline directly:
 
 ```python
 from qdrant_client import QdrantClient
@@ -117,9 +222,12 @@ for hit in result.data:
     print(hit["score"], hit["payload"])
 ```
 
+This is equivalent to what `Connection` does internally, giving you full
+control over the client lifecycle and config.
+
 ---
 
-## ExecutionResult
+## `ExecutionResult`
 
 All operations return an `ExecutionResult`:
 
diff --git a/docs/reference.md b/docs/reference.md
index 7944386..1d7377a 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -133,9 +133,10 @@ qql/
 ├── pyproject.toml          # Package config; installs the `qql` CLI command
 ├── src/
 │   └── qql/
-│       ├── __init__.py     # Public API: run_query()
+│       ├── __init__.py     # Public API: Connection, run_query()
 │       ├── cli.py          # CLI entry point: connect, disconnect, execute, dump, REPL
 │       ├── config.py       # QQLConfig dataclass + ~/.qql/config.json I/O
+│       ├── connection.py   # Connection class — stateful programmatic API
 │       ├── exceptions.py   # QQLError, QQLSyntaxError, QQLRuntimeError
 │       ├── lexer.py        # Tokenizer: string → List[Token]
 │       ├── ast_nodes.py    # Frozen dataclasses for each statement and filter type
@@ -148,6 +149,7 @@ qql/
     ├── test_lexer.py       # Tokenizer unit tests
     ├── test_parser.py      # Parser unit tests
     ├── test_executor.py    # Executor unit tests (mocked Qdrant client)
+    ├── test_connection.py  # Connection class unit tests (mocked Qdrant client)
     ├── test_script.py      # Script runner unit tests
     └── test_dumper.py      # Dumper unit tests
 ```
@@ -162,7 +164,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
 pytest tests/ -v
 ```
 
-Expected output: **500 tests passing**.
+Expected output: **549 tests passing**.
 
 ---
 
diff --git a/src/qql/__init__.py b/src/qql/__init__.py
index 567f036..c71f152 100644
--- a/src/qql/__init__.py
+++ b/src/qql/__init__.py
@@ -6,6 +6,7 @@
     __version__ = "0.0.0+unknown"
 
 from .config import DEFAULT_MODEL, QQLConfig, load_config
+from .connection import Connection
 from .exceptions import QQLError, QQLRuntimeError, QQLSyntaxError
 from .executor import ExecutionResult, Executor
 from .lexer import Lexer
@@ -13,6 +14,7 @@
 
 __all__ = [
     "__version__",
+    "Connection",
     "QQLConfig",
     "QQLError",
     "QQLRuntimeError",
@@ -32,15 +34,17 @@ def run_query(
     secret: str | None = None,
     default_model: str | None = None,
 ) -> ExecutionResult:
-    """Convenience function for programmatic use."""
-    from qdrant_client import QdrantClient
-
-    cfg = QQLConfig(
-        url=url,
-        secret=secret,
-        default_model=default_model or DEFAULT_MODEL,
-    )
-    client = QdrantClient(url=url, api_key=secret)
-    tokens = Lexer().tokenize(query)
-    node = Parser(tokens).parse()
-    return Executor(client, cfg).execute(node)
+    """One-shot convenience function kept for backward compatibility.
+
+    Creates a :class:`Connection`, runs one query, closes the connection, and
+    returns the result.  The underlying ``QdrantClient`` is always released —
+    even if the query raises — so repeated calls do not leak resources.
+
+    For workloads that issue multiple queries, prefer :class:`Connection`
+    directly — it reuses a single client across all calls::
+
+        with Connection(url, secret=secret) as conn:
+            result = conn.run_query(query)
+    """
+    with Connection(url=url, secret=secret, default_model=default_model) as conn:
+        return conn.run_query(query)
diff --git a/src/qql/connection.py b/src/qql/connection.py
new file mode 100644
index 0000000..e51f19f
--- /dev/null
+++ b/src/qql/connection.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+from .config import DEFAULT_MODEL, QQLConfig
+from .executor import Executor, ExecutionResult
+from .lexer import Lexer
+from .parser import Parser
+
+
+class Connection:
+    """Stateful connection to a Qdrant instance.
+
+    Creates a single ``QdrantClient`` and ``Executor`` once and reuses them for
+    every :meth:`run_query` call — more efficient than the standalone
+    :func:`run_query` function, which creates a fresh client on every
+    invocation.
+
+    **Basic usage**::
+
+        conn = Connection("http://localhost:6333", secret="my-key")
+        result = conn.run_query(
+            "INSERT INTO COLLECTION docs VALUES {'text': 'hello world'}"
+        )
+        result = conn.run_query("SEARCH docs SIMILAR TO 'hello' LIMIT 5")
+        conn.close()
+
+    **Context manager (preferred)** — the HTTP connection pool is always
+    released, even if ``run_query`` raises::
+
+        with Connection("http://localhost:6333") as conn:
+            result = conn.run_query("SHOW COLLECTIONS")
+            print(result.data)
+
+    **Qdrant Cloud**::
+
+        with Connection("https://<cluster>.qdrant.io", secret="<api-key>") as conn:
+            result = conn.run_query("SHOW COLLECTIONS")
+
+    **Custom embedding model**::
+
+        with Connection(
+            "http://localhost:6333",
+            default_model="BAAI/bge-base-en-v1.5",
+        ) as conn:
+            result = conn.run_query(
+                "INSERT INTO COLLECTION docs VALUES {'text': 'hello'}"
+            )
+    """
+
+    def __init__(
+        self,
+        url: str = "http://localhost:6333",
+        secret: str | None = None,
+        default_model: str | None = None,
+    ) -> None:
+        """Create a connection to a Qdrant instance.
+
+        Args:
+            url: Base URL of the Qdrant instance (default: ``http://localhost:6333``).
+            secret: API key for authenticated instances; ``None`` for unauthenticated.
+            default_model: Dense embedding model used when no ``USING MODEL`` clause
+                is specified.  Defaults to
+                ``sentence-transformers/all-MiniLM-L6-v2``.
+        """
+        from qdrant_client import QdrantClient
+
+        self._config = QQLConfig(
+            url=url,
+            secret=secret,
+            default_model=default_model or DEFAULT_MODEL,
+        )
+        self._client = QdrantClient(url=url, api_key=secret)
+        self._executor = Executor(self._client, self._config)
+
+    # ── Public API ────────────────────────────────────────────────────────
+
+    def run_query(self, query: str) -> ExecutionResult:
+        """Parse and execute a single QQL statement.
+
+        Args:
+            query: A QQL query string, e.g.
+                ``"SEARCH docs SIMILAR TO 'hello' LIMIT 5"``.
+
+        Returns:
+            An :class:`~qql.ExecutionResult` with ``success``, ``message``,
+            and ``data`` fields.
+
+        Raises:
+            QQLSyntaxError: The query string could not be parsed.
+            QQLRuntimeError: The query parsed correctly but Qdrant rejected it.
+        """
+        tokens = Lexer().tokenize(query)
+        node = Parser(tokens).parse()
+        return self._executor.execute(node)
+
+    def close(self) -> None:
+        """Close the underlying Qdrant HTTP connection pool.
+
+        Call this explicitly when not using the context-manager form, or let
+        the ``with`` statement handle it automatically.
+        """
+        self._client.close()
+
+    # ── Context manager ───────────────────────────────────────────────────
+
+    def __enter__(self) -> Connection:
+        return self
+
+    def __exit__(self, *_: object) -> None:
+        self.close()
+
+    # ── Power-user properties ─────────────────────────────────────────────
+
+    @property
+    def config(self) -> QQLConfig:
+        """The :class:`~qql.QQLConfig` in use (url, secret, default_model)."""
+        return self._config
+
+    @property
+    def executor(self) -> Executor:
+        """Direct access to the :class:`~qql.Executor` for low-level use.
+
+        Example — run multiple statements sharing a pre-built AST node::
+
+            from qql.lexer import Lexer
+            from qql.parser import Parser
+
+            conn = Connection("http://localhost:6333")
+            tokens = Lexer().tokenize("SHOW COLLECTIONS")
+            node = Parser(tokens).parse()
+            result = conn.executor.execute(node)
+        """
+        return self._executor
diff --git a/tests/test_connection.py b/tests/test_connection.py
new file mode 100644
index 0000000..c209698
--- /dev/null
+++ b/tests/test_connection.py
@@ -0,0 +1,193 @@
+"""Tests for the Connection class (src/qql/connection.py).
+
+All tests mock QdrantClient so no live Qdrant instance is required.
+"""
+import pytest
+
+from qql import Connection, QQLConfig, Executor, ExecutionResult, run_query
+from qql.exceptions import QQLSyntaxError
+
+
+# ── TestConnectionInit ────────────────────────────────────────────────────────
+
+class TestConnectionInit:
+    """Connection.__init__ stores config and wires up the executor."""
+
+    def test_default_url_and_no_secret(self, mocker):
+        mocker.patch("qdrant_client.QdrantClient")
+        conn = Connection()
+        assert conn.config.url == "http://localhost:6333"
+        assert conn.config.secret is None
+
+    def test_custom_url_and_secret_passed_to_qdrant_client(self, mocker):
+        mock_client_cls = mocker.patch("qdrant_client.QdrantClient")
+        Connection("https://cloud.example.io", secret="s3cr3t")
+        mock_client_cls.assert_called_once_with(
+            url="https://cloud.example.io", api_key="s3cr3t"
+        )
+
+    def test_custom_default_model_stored_in_config(self, mocker):
+        mocker.patch("qdrant_client.QdrantClient")
+        conn = Connection("http://localhost:6333", default_model="BAAI/bge-small-en-v1.5")
+        assert conn.config.default_model == "BAAI/bge-small-en-v1.5"
+
+    def test_config_and_executor_properties_return_correct_types(self, mocker):
+        mocker.patch("qdrant_client.QdrantClient")
+        conn = Connection("http://localhost:6333")
+        assert isinstance(conn.config, QQLConfig)
+        assert isinstance(conn.executor, Executor)
+
+
+# ── TestConnectionRunQuery ────────────────────────────────────────────────────
+
+class TestConnectionRunQuery:
+    """Connection.run_query() pipes through the Lexer → Parser → Executor."""
+
+    def test_run_query_calls_executor_execute(self, mocker):
+        mocker.patch("qdrant_client.QdrantClient")
+        mock_executor = mocker.MagicMock()
+        mock_executor.execute.return_value = ExecutionResult(
+            success=True, message="ok", data=[]
+        )
+        mocker.patch("qql.connection.Executor", return_value=mock_executor)
+
+        conn = Connection("http://localhost:6333")
+        conn.run_query("SHOW COLLECTIONS")
+        mock_executor.execute.assert_called_once()
+
+    def test_executor_instance_reused_across_queries(self, mocker):
+        """Executor() is constructed once; run_query() never re-instantiates it."""
+        mocker.patch("qdrant_client.QdrantClient")
+        mock_executor = mocker.MagicMock()
+        mock_executor.execute.return_value = ExecutionResult(
+            success=True, message="ok", data=[]
+        )
+        executor_cls = mocker.patch("qql.connection.Executor", return_value=mock_executor)
+
+        conn = Connection("http://localhost:6333")
+        conn.run_query("SHOW COLLECTIONS")
+        conn.run_query("SHOW COLLECTIONS")
+        conn.run_query("SHOW COLLECTIONS")
+
+        # Executor constructor called exactly once, not once per query
+        executor_cls.assert_called_once()
+        # But execute() called three times
+        assert mock_executor.execute.call_count == 3
+
+    def test_invalid_query_raises_qql_syntax_error(self, mocker):
+        mocker.patch("qdrant_client.QdrantClient")
+        conn = Connection("http://localhost:6333")
+        with pytest.raises(QQLSyntaxError):
+            conn.run_query("TOTALLY INVALID QUERY GIBBERISH")
+
+    def test_run_query_returns_execution_result(self, mocker):
+        mocker.patch("qdrant_client.QdrantClient")
+        mock_executor = mocker.MagicMock()
+        mock_executor.execute.return_value = ExecutionResult(
+            success=True, message="1 collection(s) found", data=["docs"]
+        )
+        mocker.patch("qql.connection.Executor", return_value=mock_executor)
+
+        conn = Connection("http://localhost:6333")
+        result = conn.run_query("SHOW COLLECTIONS")
+        assert isinstance(result, ExecutionResult)
+        assert result.success is True
+
+
+# ── TestConnectionLifecycle ───────────────────────────────────────────────────
+
+class TestConnectionLifecycle:
+    """Connection.close() and the context-manager protocol."""
+
+    def test_close_calls_client_close(self, mocker):
+        mock_client = mocker.MagicMock()
+        mocker.patch("qdrant_client.QdrantClient", return_value=mock_client)
+        conn = Connection("http://localhost:6333")
+        conn.close()
+        mock_client.close.assert_called_once()
+
+    def test_context_manager_enter_returns_self(self, mocker):
+        mock_client = mocker.MagicMock()
+        mocker.patch("qdrant_client.QdrantClient", return_value=mock_client)
+        conn = Connection("http://localhost:6333")
+        assert conn.__enter__() is conn
+
+    def test_context_manager_exit_calls_close(self, mocker):
+        mock_client = mocker.MagicMock()
+        mocker.patch("qdrant_client.QdrantClient", return_value=mock_client)
+        with Connection("http://localhost:6333"):
+            pass
+        mock_client.close.assert_called_once()
+
+    def test_context_manager_closes_even_when_body_raises(self, mocker):
+        mock_client = mocker.MagicMock()
+        mocker.patch("qdrant_client.QdrantClient", return_value=mock_client)
+        with pytest.raises(ValueError):
+            with Connection("http://localhost:6333"):
+                raise ValueError("simulated error inside with-block")
+        # close() must still have been called
+        mock_client.close.assert_called_once()
+
+
+# ── TestRunQueryBackwardCompat ────────────────────────────────────────────────
+
+class TestRunQueryBackwardCompat:
+    """Standalone run_query() keeps working with the same signature and semantics."""
+
+    def test_run_query_still_callable_with_same_signature(self, mocker):
+        mocker.patch("qdrant_client.QdrantClient")
+        mock_executor = mocker.MagicMock()
+        mock_executor.execute.return_value = ExecutionResult(
+            success=True, message="ok", data=[]
+        )
+        mocker.patch("qql.connection.Executor", return_value=mock_executor)
+        # Must not raise; same kwargs as before the refactor
+        run_query("SHOW COLLECTIONS", url="http://localhost:6333", secret=None)
+
+    def test_run_query_delegates_to_connection(self, mocker):
+        """run_query() must delegate to Connection, not re-implement the pipeline."""
+        conn_instance = mocker.MagicMock()
+        conn_instance.run_query.return_value = ExecutionResult(
+            success=True, message="ok", data=[]
+        )
+        # Context-manager protocol: __enter__ returns the mock, __exit__ is a no-op
+        conn_instance.__enter__ = mocker.MagicMock(return_value=conn_instance)
+        conn_instance.__exit__ = mocker.MagicMock(return_value=False)
+        conn_cls = mocker.patch("qql.Connection", return_value=conn_instance)
+        run_query("SHOW COLLECTIONS", url="http://localhost:6333")
+        conn_cls.assert_called_once_with(
+            url="http://localhost:6333", secret=None, default_model=None
+        )
+        conn_instance.run_query.assert_called_once_with("SHOW COLLECTIONS")
+
+    def test_run_query_closes_connection_after_query(self, mocker):
+        """run_query() must call close() — it must not leak the QdrantClient."""
+        mock_client = mocker.MagicMock()
+        mocker.patch("qdrant_client.QdrantClient", return_value=mock_client)
+        mock_executor = mocker.MagicMock()
+        mock_executor.execute.return_value = ExecutionResult(
+            success=True, message="ok", data=[]
+        )
+        mocker.patch("qql.connection.Executor", return_value=mock_executor)
+        run_query("SHOW COLLECTIONS", url="http://localhost:6333")
+        # close() must have been called exactly once
+        mock_client.close.assert_called_once()
+
+    def test_run_query_closes_connection_even_when_query_raises(self, mocker):
+        """run_query() must call close() even if the query throws."""
+        mock_client = mocker.MagicMock()
+        mocker.patch("qdrant_client.QdrantClient", return_value=mock_client)
+        # Make the query raise a runtime error (e.g. collection not found)
+        from qql.exceptions import QQLRuntimeError
+        mock_executor = mocker.MagicMock()
+        mock_executor.execute.side_effect = QQLRuntimeError("collection 'x' does not exist")
+        mocker.patch("qql.connection.Executor", return_value=mock_executor)
+        with pytest.raises(QQLRuntimeError):
+            run_query("SEARCH x SIMILAR TO 'q' LIMIT 5", url="http://localhost:6333")
+        # close() still called
+        mock_client.close.assert_called_once()
+
+    def test_run_query_invalid_syntax_still_raises(self, mocker):
+        mocker.patch("qdrant_client.QdrantClient")
+        with pytest.raises(QQLSyntaxError):
+            run_query("TOTALLY INVALID", url="http://localhost:6333")